In [2]:
import os
# !pip install pyarrow==15.0.2
# !pip install optuna
# !pip install optuna_integration
# !pip install evaluate

# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
# os.environ['TORCH_CUDA_ALLOC_SYNC'] = '1'

from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, BertTokenizerFast, TrainerCallback
from torch.utils.data import Dataset
from torch import tensor, cuda
from torch.nn.functional import softmax
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import evaluate
import optuna
from optuna.pruners import MedianPruner
from optuna.integration import TensorBoardCallback
import math

Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-2.21.0-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1

In [3]:
class CSVDataset(Dataset):
    def __init__(self, bindings, labels=None, device='cpu'):
        self.bindings = bindings
        self.labels = labels
        self.device = device

    def __len__(self):
        return len(self.bindings["input_ids"])

    def __getitem__(self, idx):
        item = {k: tensor(v[idx]).to(self.device) for k, v in self.bindings.items()}
        if self.labels:
            item['labels'] = tensor(self.labels[idx] - 1).to(self.device)
        return item

class OptunaPruningCallback(TrainerCallback):
    def __init__(self, trial, metric_name):
        self.trial = trial
        self.metric_name = metric_name

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        epoch = math.ceil(state.epoch)
        value = metrics.get(self.metric_name)
        self.trial.report(value, epoch)
        if self.trial.should_prune():
            raise optuna.TrialPruned()

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [4]:
model_name = "onlplab/alephbert-base"
num_labels = 20
output_dir = "test_trainer"
eval_strategy = "epoch"
csv_file = "updated_final.csv"
device = 'cuda:0' if cuda.is_available() else 'cpu'
metric = evaluate.load("accuracy")
tokenizer = BertTokenizerFast.from_pretrained(model_name)

ds = pd.read_csv(csv_file, sep='\t')
ds = ds.dropna()

X = list("[CLS]" + ds['sentence'] + "[SEP]" + ds['part'] + "[SEP]")
y = list(ds['category'].astype(int))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)
X_test_tokenized = tokenizer(X_test, padding=True, truncation=True, max_length=512)

train_ds = CSVDataset(X_train_tokenized, y_train, device=device)
test_ds = CSVDataset(X_test_tokenized, y_test, device=device)

print(f'{ds.shape[0]} entries in dataset')
ds.head()

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/288 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/545k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

987 entries in dataset


Unnamed: 0,sentence,part,category
0,"החקלאי יצא לרסס את מטע הזיתים שלו , ומשלא שב ל...",מטע הזיתים,12.0
1,"החקלאי יצא לרסס את מטע הזיתים שלו , ומשלא שב ל...",קריאות הטלפון,11.0
2,"החקלאי יצא לרסס את מטע הזיתים שלו , ומשלא שב ל...",רוח חיים,18.0
3,"גופתו של חקלאי בן חמישים ושלוש , תושב כפר סבא ...",בן חמישים ושלוש,17.0
4,"גופתו של חקלאי בן חמישים ושלוש , תושב כפר סבא ...",תושב כפר סבא,16.0


In [6]:
def optimize_hyperparameters():
    print(f"Optimizing hyperparameters for {model_name} on {device} with {csv_file} dataset")

    def objective(trial):
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=20)
        learning_rate=trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True)
        weight_decay=trial.suggest_float("weight_decay", 1e-4, 1e-1)
        per_device_train_batch_size=trial.suggest_categorical("per_device_train_batch_size", [8, 16, 32, 64, 128])
        warmup_steps=trial.suggest_int("warmup_steps", 0, 500)
        warmup_ratio=trial.suggest_float("warmup_ratio", 0.0, 0.2)
        num_train_epochs=trial.suggest_int("num_train_epochs", 2, 10)

        training_args = TrainingArguments(
            output_dir="./results",
            learning_rate=learning_rate,
            weight_decay=weight_decay,
            per_device_train_batch_size=per_device_train_batch_size,
            warmup_steps=warmup_steps,
            warmup_ratio=warmup_ratio,
            num_train_epochs=num_train_epochs,
            eval_strategy="epoch",
            logging_strategy="epoch",
            save_strategy="no",
            save_total_limit=2,
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_ds,
            eval_dataset=test_ds,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics,
            callbacks=[OptunaPruningCallback(trial, "eval_accuracy")],
        )

        trainer.train()
        out = trainer.evaluate()
        return out['eval_accuracy']

    study = optuna.create_study(direction="maximize", pruner=MedianPruner())
    study.optimize(objective, n_trials=300)
    print("Best Hyperparameters:\n", study.best_params)
    return study

study = optimize_hyperparameters()

[I 2024-08-28 00:23:16,637] A new study created in memory with name: no-name-11db6f31-c266-4a40-b8d8-ab2604577ead


Optimizing hyperparameters for onlplab/alephbert-base on cuda:0 with updated_final.csv dataset


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,3.0163,2.909772,0.116162
2,2.7602,2.557595,0.227273
3,2.4451,2.32643,0.20202
4,2.2777,2.249166,0.217172
5,2.1568,2.184335,0.222222
6,1.9792,2.131437,0.282828
7,1.7165,2.116848,0.373737


[I 2024-08-28 00:24:47,336] Trial 0 finished with value: 0.37373737373737376 and parameters: {'learning_rate': 1.1538464302436588e-05, 'weight_decay': 0.034981821374836304, 'per_device_train_batch_size': 16, 'warmup_steps': 358, 'warmup_ratio': 0.08213525023749946, 'num_train_epochs': 7}. Best is trial 0 with value: 0.37373737373737376.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,3.0797,3.061753,0.020202
2,3.0501,3.012738,0.030303
3,2.9816,2.933117,0.141414


[I 2024-08-28 00:25:20,612] Trial 1 finished with value: 0.1414141414141414 and parameters: {'learning_rate': 4.885093066047971e-05, 'weight_decay': 0.021892002222516357, 'per_device_train_batch_size': 128, 'warmup_steps': 416, 'warmup_ratio': 0.06308948874211906, 'num_train_epochs': 3}. Best is trial 0 with value: 0.37373737373737376.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.9459,2.660593,0.212121
2,2.4524,2.294347,0.262626
3,2.2295,2.183706,0.267677


[I 2024-08-28 00:26:10,010] Trial 2 finished with value: 0.2676767676767677 and parameters: {'learning_rate': 1.1114453160463443e-05, 'weight_decay': 0.008156820106410785, 'per_device_train_batch_size': 8, 'warmup_steps': 358, 'warmup_ratio': 0.15557706058098353, 'num_train_epochs': 3}. Best is trial 0 with value: 0.37373737373737376.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.9078,2.579007,0.191919
2,2.3889,2.27745,0.237374


[I 2024-08-28 00:26:38,039] Trial 3 finished with value: 0.23737373737373738 and parameters: {'learning_rate': 1.1488343525557529e-05, 'weight_decay': 0.039257626601804395, 'per_device_train_batch_size': 16, 'warmup_steps': 90, 'warmup_ratio': 0.08311584906721797, 'num_train_epochs': 2}. Best is trial 0 with value: 0.37373737373737376.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6902,2.343354,0.262626
2,2.2254,2.206045,0.292929


[I 2024-08-28 00:27:11,321] Trial 4 finished with value: 0.29292929292929293 and parameters: {'learning_rate': 1.3702221854940181e-05, 'weight_decay': 0.0801138599137642, 'per_device_train_batch_size': 8, 'warmup_steps': 114, 'warmup_ratio': 0.19101375424916436, 'num_train_epochs': 2}. Best is trial 0 with value: 0.37373737373737376.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:27:22,966] Trial 5 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:27:34,665] Trial 6 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.7997,2.511158,0.207071
2,2.3632,2.249923,0.257576
3,2.1274,2.105952,0.29798


[I 2024-08-28 00:28:15,606] Trial 7 finished with value: 0.29797979797979796 and parameters: {'learning_rate': 3.3156189872181284e-05, 'weight_decay': 0.01945079172718559, 'per_device_train_batch_size': 16, 'warmup_steps': 246, 'warmup_ratio': 0.172924548103451, 'num_train_epochs': 3}. Best is trial 0 with value: 0.37373737373737376.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:28:27,954] Trial 8 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:28:40,571] Trial 9 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:28:53,451] Trial 10 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.8033,2.518811,0.207071
2,2.3686,2.25437,0.262626
3,2.1349,2.112985,0.287879
4,1.7275,1.871661,0.409091
5,1.1354,1.837832,0.454545
6,0.6664,1.829395,0.5


[I 2024-08-28 00:30:12,554] Trial 11 finished with value: 0.5 and parameters: {'learning_rate': 2.764249748934433e-05, 'weight_decay': 0.05142542119344435, 'per_device_train_batch_size': 16, 'warmup_steps': 211, 'warmup_ratio': 0.19650570294927658, 'num_train_epochs': 6}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:30:26,658] Trial 12 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.7996,2.510794,0.207071
2,2.363,2.249713,0.257576
3,2.1271,2.105617,0.29798
4,1.7036,1.857366,0.424242
5,1.1096,1.840044,0.459596
6,0.6731,1.902549,0.479798
7,0.4236,2.059726,0.454545
8,0.3003,2.080589,0.464646


[I 2024-08-28 00:32:11,400] Trial 13 finished with value: 0.46464646464646464 and parameters: {'learning_rate': 2.5103522174159235e-05, 'weight_decay': 0.042745676317803086, 'per_device_train_batch_size': 16, 'warmup_steps': 186, 'warmup_ratio': 0.144886022980873, 'num_train_epochs': 8}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5069,2.285763,0.267677
2,2.1356,2.107456,0.287879
3,1.776,2.002273,0.353535
4,1.3367,1.953462,0.409091


[I 2024-08-28 00:33:16,729] Trial 14 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.8866,2.764055,0.212121
2,2.5901,2.414538,0.287879
3,2.3337,2.258609,0.257576


[I 2024-08-28 00:34:03,885] Trial 15 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.8616,2.546262,0.227273


[I 2024-08-28 00:34:30,850] Trial 16 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:34:45,054] Trial 17 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6508,2.309011,0.247475
2,2.1931,2.106297,0.313131
3,1.8133,1.968368,0.343434
4,1.3438,1.90257,0.419192


[I 2024-08-28 00:35:50,376] Trial 18 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:36:02,625] Trial 19 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:36:15,457] Trial 20 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:36:29,599] Trial 21 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:36:43,703] Trial 22 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:36:57,832] Trial 23 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.7346,2.394841,0.222222
2,2.2803,2.173609,0.292929
3,1.9741,1.950199,0.393939
4,1.3151,1.837404,0.449495
5,0.7636,1.875408,0.474747
6,0.4543,1.99404,0.479798
7,0.3191,2.027412,0.474747


[I 2024-08-28 00:38:29,711] Trial 24 finished with value: 0.47474747474747475 and parameters: {'learning_rate': 2.9334555432426744e-05, 'weight_decay': 0.05085881199381166, 'per_device_train_batch_size': 16, 'warmup_steps': 136, 'warmup_ratio': 0.13433152899761266, 'num_train_epochs': 7}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5716,2.303679,0.262626
2,2.063,1.90534,0.393939
3,1.3582,1.859774,0.439394
4,0.7639,1.876787,0.464646
5,0.396,1.94739,0.515152
6,0.2575,2.01679,0.5


[I 2024-08-28 00:40:04,803] Trial 25 finished with value: 0.5 and parameters: {'learning_rate': 3.082312560189922e-05, 'weight_decay': 0.05270596990111001, 'per_device_train_batch_size': 8, 'warmup_steps': 116, 'warmup_ratio': 0.1403839458564463, 'num_train_epochs': 6}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5817,2.321382,0.267677
2,2.1157,2.040314,0.328283
3,1.4398,1.891312,0.444444
4,0.8008,1.863928,0.464646
5,0.4465,2.120994,0.439394


[I 2024-08-28 00:41:39,507] Trial 26 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4112,2.197299,0.262626
2,1.7529,1.838782,0.40404
3,0.9924,1.966671,0.469697
4,0.5152,2.077127,0.459596
5,0.267,2.185363,0.479798


[I 2024-08-28 00:42:59,270] Trial 27 finished with value: 0.4797979797979798 and parameters: {'learning_rate': 4.247861502444647e-05, 'weight_decay': 0.06144308442202997, 'per_device_train_batch_size': 8, 'warmup_steps': 42, 'warmup_ratio': 0.13281749640155413, 'num_train_epochs': 5}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3603,2.176579,0.287879
2,1.7234,1.814257,0.409091
3,1.0225,1.953723,0.434343
4,0.5667,2.003295,0.459596


[I 2024-08-28 00:44:03,712] Trial 28 finished with value: 0.4595959595959596 and parameters: {'learning_rate': 4.4265997241199995e-05, 'weight_decay': 0.08054760470768622, 'per_device_train_batch_size': 8, 'warmup_steps': 14, 'warmup_ratio': 0.18112714937349592, 'num_train_epochs': 4}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4568,2.229466,0.272727
2,1.8559,1.863053,0.383838
3,1.1171,1.947964,0.454545
4,0.6048,2.097703,0.459596


[I 2024-08-28 00:45:22,368] Trial 29 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4386,2.197499,0.272727
2,1.8243,1.781283,0.474747
3,1.0261,1.88276,0.474747
4,0.5115,1.871947,0.479798


[I 2024-08-28 00:46:26,555] Trial 30 finished with value: 0.4797979797979798 and parameters: {'learning_rate': 4.9790121293295445e-05, 'weight_decay': 0.09017638073456444, 'per_device_train_batch_size': 8, 'warmup_steps': 85, 'warmup_ratio': 0.15774787956798392, 'num_train_epochs': 4}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4657,2.224372,0.262626
2,1.8388,1.839874,0.409091
3,1.0466,1.910734,0.39899


[I 2024-08-28 00:47:29,888] Trial 31 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4844,2.242731,0.262626
2,1.92,1.808973,0.439394
3,1.1054,1.888785,0.479798
4,0.5355,2.119989,0.439394
5,0.2836,2.194399,0.439394


[I 2024-08-28 00:48:49,718] Trial 32 finished with value: 0.4393939393939394 and parameters: {'learning_rate': 4.4884344659097255e-05, 'weight_decay': 0.0892363502383049, 'per_device_train_batch_size': 8, 'warmup_steps': 92, 'warmup_ratio': 0.15127669332497262, 'num_train_epochs': 5}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4289,2.228066,0.272727
2,1.8956,1.894536,0.373737
3,1.232,1.919942,0.429293


[I 2024-08-28 00:49:52,994] Trial 33 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4833,2.242376,0.262626
2,1.9182,1.816479,0.429293
3,1.1061,1.89252,0.474747
4,0.5376,2.103169,0.429293
5,0.2909,2.180921,0.434343


[I 2024-08-28 00:51:12,767] Trial 34 finished with value: 0.43434343434343436 and parameters: {'learning_rate': 4.395882225002699e-05, 'weight_decay': 0.0763047531563392, 'per_device_train_batch_size': 8, 'warmup_steps': 89, 'warmup_ratio': 0.19848067258101432, 'num_train_epochs': 5}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5207,2.285879,0.272727
2,2.0099,1.908649,0.368687
3,1.2931,1.852201,0.439394


[I 2024-08-28 00:52:01,530] Trial 35 finished with value: 0.4393939393939394 and parameters: {'learning_rate': 3.77433285653333e-05, 'weight_decay': 0.08934359225018777, 'per_device_train_batch_size': 8, 'warmup_steps': 108, 'warmup_ratio': 0.12765228453160826, 'num_train_epochs': 3}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6105,2.305206,0.242424


[I 2024-08-28 00:52:33,795] Trial 36 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:52:45,397] Trial 37 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3362,2.124876,0.287879
2,1.7265,1.893458,0.414141


[I 2024-08-28 00:53:18,753] Trial 38 finished with value: 0.41414141414141414 and parameters: {'learning_rate': 4.732386115723811e-05, 'weight_decay': 0.006244899496725527, 'per_device_train_batch_size': 8, 'warmup_steps': 13, 'warmup_ratio': 0.15595498604656458, 'num_train_epochs': 2}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4994,2.239634,0.277778
2,1.9069,1.824359,0.40404
3,1.1703,1.799901,0.464646


[I 2024-08-28 00:54:07,401] Trial 39 finished with value: 0.46464646464646464 and parameters: {'learning_rate': 4.1444103453385376e-05, 'weight_decay': 0.051814315794277865, 'per_device_train_batch_size': 8, 'warmup_steps': 90, 'warmup_ratio': 0.18728667633592264, 'num_train_epochs': 3}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:54:19,239] Trial 40 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5711,2.299274,0.277778
2,2.0955,1.942831,0.373737
3,1.3959,1.858309,0.444444
4,0.7843,1.883643,0.464646
5,0.4194,2.112288,0.474747


[I 2024-08-28 00:55:53,926] Trial 41 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:56:06,181] Trial 42 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:56:19,043] Trial 43 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:56:35,863] Trial 44 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:56:50,096] Trial 45 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:57:01,669] Trial 46 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:57:15,860] Trial 47 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:57:28,126] Trial 48 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5064,2.252532,0.272727
2,1.9723,1.824631,0.419192


[I 2024-08-28 00:58:16,411] Trial 49 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:58:30,798] Trial 50 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:58:44,920] Trial 51 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:58:59,018] Trial 52 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:59:13,108] Trial 53 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:59:27,215] Trial 54 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:59:39,986] Trial 55 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 00:59:54,088] Trial 56 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:00:08,495] Trial 57 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:00:25,295] Trial 58 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6209,2.299317,0.262626


[I 2024-08-28 01:00:52,408] Trial 59 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:01:04,888] Trial 60 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4861,2.229917,0.262626
2,1.9103,1.802088,0.409091
3,1.1669,1.770137,0.474747


[I 2024-08-28 01:01:53,799] Trial 61 finished with value: 0.47474747474747475 and parameters: {'learning_rate': 4.116181672984696e-05, 'weight_decay': 0.051772655872401055, 'per_device_train_batch_size': 8, 'warmup_steps': 96, 'warmup_ratio': 0.1903784486787332, 'num_train_epochs': 3}. Best is trial 11 with value: 0.5.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.513,2.249066,0.262626
2,1.9527,1.815261,0.434343
3,1.2082,1.794215,0.505051


[I 2024-08-28 01:02:42,713] Trial 62 finished with value: 0.5050505050505051 and parameters: {'learning_rate': 4.721491519103951e-05, 'weight_decay': 0.04515859790155174, 'per_device_train_batch_size': 8, 'warmup_steps': 103, 'warmup_ratio': 0.19887258800100943, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5129,2.248935,0.262626


[I 2024-08-28 01:03:15,059] Trial 63 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:03:31,860] Trial 64 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4929,2.237252,0.262626
2,1.9364,1.790457,0.429293
3,1.1474,1.770639,0.459596


[I 2024-08-28 01:04:20,684] Trial 65 finished with value: 0.4595959595959596 and parameters: {'learning_rate': 4.982073802611961e-05, 'weight_decay': 0.09986585429247613, 'per_device_train_batch_size': 8, 'warmup_steps': 124, 'warmup_ratio': 0.19481487303860337, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:04:37,445] Trial 66 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3742,2.138968,0.282828
2,1.7546,1.896119,0.393939


[I 2024-08-28 01:05:10,737] Trial 67 finished with value: 0.3939393939393939 and parameters: {'learning_rate': 4.588317129023097e-05, 'weight_decay': 0.035570118761653044, 'per_device_train_batch_size': 8, 'warmup_steps': 30, 'warmup_ratio': 0.1649367078321855, 'num_train_epochs': 2}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5432,2.297531,0.267677


[I 2024-08-28 01:05:43,403] Trial 68 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4209,2.239008,0.262626
2,1.7948,1.78358,0.40404


[I 2024-08-28 01:06:31,415] Trial 69 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5455,2.311918,0.272727


[I 2024-08-28 01:07:03,730] Trial 70 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:07:15,287] Trial 71 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5962,2.307239,0.272727


[I 2024-08-28 01:07:47,896] Trial 72 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:08:00,855] Trial 73 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6229,2.299552,0.262626


[I 2024-08-28 01:08:27,876] Trial 74 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5714,2.337245,0.267677


[I 2024-08-28 01:09:00,297] Trial 75 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:09:17,657] Trial 76 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:09:32,042] Trial 77 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3629,2.128505,0.282828
2,1.6934,1.775252,0.454545
3,1.0153,1.771711,0.494949


[I 2024-08-28 01:10:20,955] Trial 78 finished with value: 0.494949494949495 and parameters: {'learning_rate': 4.804484429764023e-05, 'weight_decay': 0.05314866841670613, 'per_device_train_batch_size': 8, 'warmup_steps': 26, 'warmup_ratio': 0.1523700617093168, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4434,2.222124,0.267677
2,1.8494,1.807471,0.424242
3,1.1521,1.805775,0.489899


[I 2024-08-28 01:11:10,160] Trial 79 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.723356469683871e-05, 'weight_decay': 0.052886952293962806, 'per_device_train_batch_size': 8, 'warmup_steps': 51, 'warmup_ratio': 0.15214401674399902, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4036,2.198443,0.277778
2,1.811,1.810474,0.419192
3,1.0955,1.924824,0.459596
4,0.6204,1.985226,0.449495


[I 2024-08-28 01:12:14,563] Trial 80 finished with value: 0.4494949494949495 and parameters: {'learning_rate': 4.752217993827169e-05, 'weight_decay': 0.06568056449318314, 'per_device_train_batch_size': 8, 'warmup_steps': 31, 'warmup_ratio': 0.13025520745919533, 'num_train_epochs': 4}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4336,2.216646,0.272727


[I 2024-08-28 01:12:47,012] Trial 81 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.367,2.206324,0.277778
2,1.7682,1.857823,0.439394


[I 2024-08-28 01:13:35,325] Trial 82 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4662,2.250662,0.262626
2,1.8289,1.792517,0.429293
3,1.0748,1.791244,0.479798


[I 2024-08-28 01:14:24,412] Trial 83 finished with value: 0.4797979797979798 and parameters: {'learning_rate': 4.990080164570017e-05, 'weight_decay': 0.061616000760608323, 'per_device_train_batch_size': 8, 'warmup_steps': 65, 'warmup_ratio': 0.15952334083887815, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3268,2.158945,0.282828


[I 2024-08-28 01:14:56,809] Trial 84 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4371,2.24672,0.262626
2,1.8203,1.80187,0.419192
3,1.1099,1.787135,0.459596


[I 2024-08-28 01:15:45,951] Trial 85 finished with value: 0.4595959595959596 and parameters: {'learning_rate': 4.5951588382933515e-05, 'weight_decay': 0.06927952917625403, 'per_device_train_batch_size': 8, 'warmup_steps': 65, 'warmup_ratio': 0.15402948210846784, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3965,2.194927,0.272727
2,1.8049,1.815702,0.40404
3,1.0953,1.923426,0.464646
4,0.622,1.9904,0.454545


[I 2024-08-28 01:16:50,488] Trial 86 finished with value: 0.45454545454545453 and parameters: {'learning_rate': 4.774479954412361e-05, 'weight_decay': 0.059009419098680044, 'per_device_train_batch_size': 8, 'warmup_steps': 28, 'warmup_ratio': 0.14803504041510346, 'num_train_epochs': 4}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:17:02,778] Trial 87 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6268,2.313378,0.272727


[I 2024-08-28 01:17:35,305] Trial 88 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:17:46,879] Trial 89 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4408,2.195239,0.262626
2,1.8367,1.776165,0.434343


[I 2024-08-28 01:18:34,938] Trial 90 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4618,2.261295,0.262626


[I 2024-08-28 01:19:07,506] Trial 91 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:19:24,374] Trial 92 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4745,2.218372,0.262626
2,1.8853,1.796015,0.424242
3,1.1641,1.758979,0.484848


[I 2024-08-28 01:20:13,745] Trial 93 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.0781941287877864e-05, 'weight_decay': 0.052383297384674045, 'per_device_train_batch_size': 8, 'warmup_steps': 85, 'warmup_ratio': 0.1786258023255921, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3575,2.184747,0.272727
2,1.8016,1.835973,0.409091
3,1.1446,1.817263,0.469697


[I 2024-08-28 01:21:02,644] Trial 94 finished with value: 0.4696969696969697 and parameters: {'learning_rate': 4.6115107544674075e-05, 'weight_decay': 0.0579974499963787, 'per_device_train_batch_size': 8, 'warmup_steps': 13, 'warmup_ratio': 0.1620618119634807, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:21:15,754] Trial 95 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4594,2.206817,0.262626
2,1.8471,1.840801,0.419192


[I 2024-08-28 01:21:49,430] Trial 96 finished with value: 0.41919191919191917 and parameters: {'learning_rate': 4.7982783456859456e-05, 'weight_decay': 0.05395366953995067, 'per_device_train_batch_size': 8, 'warmup_steps': 85, 'warmup_ratio': 0.1500110605154257, 'num_train_epochs': 2}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5238,2.274318,0.272727


[I 2024-08-28 01:22:21,845] Trial 97 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5094,2.304776,0.262626


[I 2024-08-28 01:22:54,272] Trial 98 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4426,2.238426,0.262626
2,1.8296,1.769248,0.444444


[I 2024-08-28 01:23:42,374] Trial 99 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:23:54,583] Trial 100 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4837,2.227646,0.267677
2,1.9038,1.800729,0.424242
3,1.1624,1.767681,0.479798


[I 2024-08-28 01:24:44,010] Trial 101 finished with value: 0.4797979797979798 and parameters: {'learning_rate': 4.127107260529317e-05, 'weight_decay': 0.05334723021953509, 'per_device_train_batch_size': 8, 'warmup_steps': 94, 'warmup_ratio': 0.1961146582633816, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5513,2.283441,0.277778


[I 2024-08-28 01:25:16,617] Trial 102 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4205,2.239632,0.267677
2,1.7939,1.798586,0.419192


[I 2024-08-28 01:26:04,739] Trial 103 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5291,2.303531,0.267677


[I 2024-08-28 01:26:37,254] Trial 104 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.481,2.279664,0.262626


[I 2024-08-28 01:27:09,655] Trial 105 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:27:26,639] Trial 106 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4632,2.253391,0.272727


[I 2024-08-28 01:27:53,731] Trial 107 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:28:05,328] Trial 108 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5071,2.25333,0.272727
2,1.9719,1.858499,0.414141


[I 2024-08-28 01:28:38,808] Trial 109 finished with value: 0.41414141414141414 and parameters: {'learning_rate': 4.7027845169412546e-05, 'weight_decay': 0.047457385429907, 'per_device_train_batch_size': 8, 'warmup_steps': 133, 'warmup_ratio': 0.15718172116032983, 'num_train_epochs': 2}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:28:55,736] Trial 110 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4851,2.229014,0.262626
2,1.9074,1.802294,0.429293
3,1.1655,1.768437,0.479798


[I 2024-08-28 01:29:44,689] Trial 111 finished with value: 0.4797979797979798 and parameters: {'learning_rate': 4.11416227330256e-05, 'weight_decay': 0.0532237574100113, 'per_device_train_batch_size': 8, 'warmup_steps': 95, 'warmup_ratio': 0.19108581104267267, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:30:01,690] Trial 112 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5111,2.257772,0.277778
2,1.9867,1.879354,0.414141
3,1.2969,1.795903,0.469697


[I 2024-08-28 01:30:50,591] Trial 113 finished with value: 0.4696969696969697 and parameters: {'learning_rate': 3.28174979654238e-05, 'weight_decay': 0.04065291019597911, 'per_device_train_batch_size': 8, 'warmup_steps': 96, 'warmup_ratio': 0.19691089728464475, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4571,2.228928,0.267677
2,1.8678,1.809737,0.429293
3,1.1784,1.78694,0.489899


[I 2024-08-28 01:31:39,510] Trial 114 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.521803562446669e-05, 'weight_decay': 0.08501785755871007, 'per_device_train_batch_size': 8, 'warmup_steps': 57, 'warmup_ratio': 0.18791340481692698, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4181,2.208971,0.262626


[I 2024-08-28 01:32:11,951] Trial 115 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:32:29,015] Trial 116 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4252,2.180494,0.262626
2,1.8133,1.79707,0.444444
3,1.1286,1.774237,0.484848


[I 2024-08-28 01:33:18,091] Trial 117 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.090993318120361e-05, 'weight_decay': 0.08934717209401645, 'per_device_train_batch_size': 8, 'warmup_steps': 52, 'warmup_ratio': 0.1734750463094894, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4568,2.230365,0.262626
2,1.9012,1.876418,0.419192


[I 2024-08-28 01:34:05,937] Trial 118 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4687,2.296248,0.267677


[I 2024-08-28 01:34:38,509] Trial 119 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5496,2.299719,0.272727


[I 2024-08-28 01:35:02,807] Trial 120 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4539,2.252292,0.267677
2,1.8617,1.820686,0.419192


[I 2024-08-28 01:35:50,684] Trial 121 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4506,2.252943,0.262626


[I 2024-08-28 01:36:23,215] Trial 122 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4601,2.24685,0.267677
2,1.8528,1.797485,0.429293


[I 2024-08-28 01:37:11,081] Trial 123 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:37:28,162] Trial 124 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3866,2.149305,0.277778
2,1.7386,1.776606,0.439394
3,1.0627,1.78208,0.489899


[I 2024-08-28 01:38:17,138] Trial 125 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.465112012284044e-05, 'weight_decay': 0.08895264542778188, 'per_device_train_batch_size': 8, 'warmup_steps': 35, 'warmup_ratio': 0.15869134263044013, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4022,2.199002,0.277778


[I 2024-08-28 01:38:49,764] Trial 126 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3624,2.194398,0.277778
2,1.7448,1.84455,0.424242


[I 2024-08-28 01:39:37,902] Trial 127 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:39:54,709] Trial 128 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:40:11,535] Trial 129 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4528,2.204257,0.262626
2,1.847,1.775216,0.434343


[I 2024-08-28 01:40:59,541] Trial 130 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5257,2.295666,0.267677


[I 2024-08-28 01:41:31,852] Trial 131 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5775,2.338301,0.267677


[I 2024-08-28 01:42:04,269] Trial 132 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:42:21,135] Trial 133 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.439,2.193333,0.262626
2,1.8238,1.777818,0.454545
3,1.1095,1.769467,0.5


[I 2024-08-28 01:43:10,230] Trial 134 finished with value: 0.5 and parameters: {'learning_rate': 4.371558970947992e-05, 'weight_decay': 0.04394509140917894, 'per_device_train_batch_size': 8, 'warmup_steps': 64, 'warmup_ratio': 0.18132550468230413, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4271,2.214028,0.262626


[I 2024-08-28 01:43:42,823] Trial 135 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:43:55,110] Trial 136 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4605,2.207201,0.262626
2,1.8529,1.772337,0.434343


[I 2024-08-28 01:44:43,200] Trial 137 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4499,2.24187,0.262626
2,1.8199,1.795209,0.424242


[I 2024-08-28 01:45:31,214] Trial 138 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:45:42,822] Trial 139 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4357,2.1929,0.262626
2,1.821,1.760389,0.449495


[I 2024-08-28 01:46:31,298] Trial 140 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5384,2.306209,0.267677


[I 2024-08-28 01:47:03,758] Trial 141 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:47:20,673] Trial 142 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:47:37,588] Trial 143 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4493,2.202184,0.262626
2,1.8343,1.76822,0.449495
3,1.1016,1.764972,0.484848


[I 2024-08-28 01:48:26,701] Trial 144 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.538424392873432e-05, 'weight_decay': 0.05833447708545049, 'per_device_train_batch_size': 8, 'warmup_steps': 73, 'warmup_ratio': 0.19523936559755672, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4865,2.241103,0.267677
2,1.8979,1.805468,0.449495
3,1.1865,1.777521,0.474747


[I 2024-08-28 01:49:15,707] Trial 145 finished with value: 0.47474747474747475 and parameters: {'learning_rate': 4.5724712255774276e-05, 'weight_decay': 0.06076901182654294, 'per_device_train_batch_size': 8, 'warmup_steps': 77, 'warmup_ratio': 0.039119382279292014, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:49:28,555] Trial 146 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6125,2.310579,0.267677


[I 2024-08-28 01:50:01,103] Trial 147 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3524,2.152256,0.277778


[I 2024-08-28 01:50:33,662] Trial 148 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4933,2.290116,0.277778


[I 2024-08-28 01:51:06,037] Trial 149 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:51:22,915] Trial 150 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5912,2.29985,0.272727


[I 2024-08-28 01:51:55,553] Trial 151 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:52:12,425] Trial 152 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5971,2.307441,0.272727


[I 2024-08-28 01:52:44,916] Trial 153 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4453,2.24743,0.262626
2,1.831,1.799928,0.419192


[I 2024-08-28 01:53:33,014] Trial 154 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:53:49,846] Trial 155 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4893,2.233168,0.262626
2,1.9183,1.804146,0.414141
3,1.1646,1.769259,0.484848


[I 2024-08-28 01:54:39,224] Trial 156 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.239071409608744e-05, 'weight_decay': 0.08424936226319701, 'per_device_train_batch_size': 8, 'warmup_steps': 102, 'warmup_ratio': 0.14776826810920407, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5336,2.267952,0.262626
2,2.0108,1.830321,0.414141
3,1.2486,1.822647,0.489899


[I 2024-08-28 01:55:28,196] Trial 157 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.670735467746174e-05, 'weight_decay': 0.0847608388932504, 'per_device_train_batch_size': 8, 'warmup_steps': 123, 'warmup_ratio': 0.14849544280234026, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:55:42,417] Trial 158 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5025,2.248168,0.267677
2,1.9565,1.800694,0.424242


[I 2024-08-28 01:56:30,485] Trial 159 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:56:42,765] Trial 160 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4962,2.24102,0.262626


[I 2024-08-28 01:57:15,215] Trial 161 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:57:32,053] Trial 162 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.492,2.236242,0.262626


[I 2024-08-28 01:58:04,762] Trial 163 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4619,2.256489,0.262626
2,1.8813,1.825945,0.419192


[I 2024-08-28 01:58:52,777] Trial 164 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4679,2.253147,0.262626
2,1.8433,1.79136,0.419192


[I 2024-08-28 01:59:40,685] Trial 165 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 01:59:58,010] Trial 166 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4123,2.169459,0.267677


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4123,2.169459,0.267677
2,1.781,1.77651,0.439394
3,1.0926,1.771362,0.484848


[I 2024-08-28 02:00:46,952] Trial 167 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.404351281621551e-05, 'weight_decay': 0.08588285554068292, 'per_device_train_batch_size': 8, 'warmup_steps': 48, 'warmup_ratio': 0.16857580958607699, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3964,2.197649,0.277778


[I 2024-08-28 02:01:19,603] Trial 168 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:01:31,177] Trial 169 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3407,2.135717,0.282828


[I 2024-08-28 02:02:03,717] Trial 170 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4486,2.24857,0.262626
2,1.8355,1.802511,0.414141


[I 2024-08-28 02:02:51,806] Trial 171 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:03:08,751] Trial 172 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3949,2.155853,0.277778
2,1.7514,1.773288,0.444444
3,1.0721,1.779572,0.484848


[I 2024-08-28 02:03:57,790] Trial 173 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.4476197815164205e-05, 'weight_decay': 0.08964141513748501, 'per_device_train_batch_size': 8, 'warmup_steps': 39, 'warmup_ratio': 0.1613894997750279, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4167,2.206002,0.272727
2,1.8182,1.826638,0.419192


[I 2024-08-28 02:04:45,714] Trial 174 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:05:02,884] Trial 175 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.403,2.160743,0.267677
2,1.7562,1.759552,0.439394
3,1.0611,1.774385,0.494949


[I 2024-08-28 02:05:51,934] Trial 176 finished with value: 0.494949494949495 and parameters: {'learning_rate': 4.6271433846438055e-05, 'weight_decay': 0.0928962311152364, 'per_device_train_batch_size': 8, 'warmup_steps': 45, 'warmup_ratio': 0.17644742573354327, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4391,2.217815,0.267677
2,1.8501,1.776851,0.434343


[I 2024-08-28 02:06:40,343] Trial 177 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4656,2.287142,0.29798


[I 2024-08-28 02:07:07,339] Trial 178 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:07:20,121] Trial 179 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4304,2.184899,0.262626
2,1.8317,1.810885,0.434343
3,1.1596,1.777369,0.484848


[I 2024-08-28 02:08:09,240] Trial 180 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 3.860685466513129e-05, 'weight_decay': 0.09815418152246438, 'per_device_train_batch_size': 8, 'warmup_steps': 52, 'warmup_ratio': 0.1798663759557821, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4554,2.229482,0.262626


[I 2024-08-28 02:08:41,541] Trial 181 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3839,2.21879,0.267677
2,1.7926,1.850276,0.434343


[I 2024-08-28 02:09:29,639] Trial 182 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:09:46,449] Trial 183 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.41,2.172661,0.272727
2,1.8119,1.817061,0.429293
3,1.1585,1.780535,0.484848


[I 2024-08-28 02:10:35,597] Trial 184 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 3.809796765575429e-05, 'weight_decay': 0.09043012047375919, 'per_device_train_batch_size': 8, 'warmup_steps': 40, 'warmup_ratio': 0.18635818798943615, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4369,2.221121,0.262626


[I 2024-08-28 02:11:07,997] Trial 185 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3689,2.216134,0.277778
2,1.802,1.874327,0.429293


[I 2024-08-28 02:11:55,974] Trial 186 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4481,2.247226,0.262626


[I 2024-08-28 02:12:28,370] Trial 187 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3973,2.233278,0.277778
2,1.8472,1.874544,0.439394


[I 2024-08-28 02:13:16,338] Trial 188 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4834,2.276484,0.267677


[I 2024-08-28 02:13:48,866] Trial 189 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3379,2.194466,0.292929
2,1.7591,1.870943,0.419192


[I 2024-08-28 02:14:37,015] Trial 190 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4803,2.267519,0.262626
2,1.8726,1.818995,0.439394


[I 2024-08-28 02:15:25,033] Trial 191 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:15:42,242] Trial 192 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3985,2.159841,0.272727
2,1.7582,1.751156,0.449495
3,1.0424,1.860543,0.474747
4,0.6015,1.860334,0.484848


[I 2024-08-28 02:16:46,885] Trial 193 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.374958719077278e-05, 'weight_decay': 0.08920653227807229, 'per_device_train_batch_size': 8, 'warmup_steps': 40, 'warmup_ratio': 0.1793643253315702, 'num_train_epochs': 4}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4306,2.217688,0.272727


[I 2024-08-28 02:17:19,368] Trial 194 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4202,2.238738,0.267677


[I 2024-08-28 02:17:51,891] Trial 195 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4301,2.262186,0.267677


[I 2024-08-28 02:18:24,376] Trial 196 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3869,2.23812,0.277778


[I 2024-08-28 02:18:56,865] Trial 197 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5438,2.316024,0.277778


[I 2024-08-28 02:19:23,895] Trial 198 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:19:40,755] Trial 199 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:19:53,075] Trial 200 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.455,2.204957,0.262626
2,1.845,1.771816,0.424242


[I 2024-08-28 02:20:41,361] Trial 201 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4055,2.198085,0.267677
2,1.7512,1.780903,0.454545


[I 2024-08-28 02:21:29,363] Trial 202 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:21:46,286] Trial 203 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5928,2.306428,0.272727


[I 2024-08-28 02:22:18,961] Trial 204 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.409,2.232254,0.267677
2,1.7789,1.799411,0.424242


[I 2024-08-28 02:23:07,062] Trial 205 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5224,2.292305,0.262626


[I 2024-08-28 02:23:39,504] Trial 206 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:23:56,506] Trial 207 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4429,2.195932,0.262626
2,1.8443,1.783807,0.439394


[I 2024-08-28 02:24:44,917] Trial 208 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5539,2.31845,0.272727


[I 2024-08-28 02:25:17,356] Trial 209 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3974,2.225564,0.267677


[I 2024-08-28 02:25:50,062] Trial 210 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4344,2.244087,0.262626
2,1.8057,1.783806,0.424242


[I 2024-08-28 02:26:38,113] Trial 211 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4897,2.269743,0.262626
2,1.8822,1.813104,0.419192


[I 2024-08-28 02:27:26,192] Trial 212 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:27:43,517] Trial 213 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:27:55,069] Trial 214 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4088,2.166183,0.267677
2,1.7707,1.770041,0.444444
3,1.08,1.773134,0.484848


[I 2024-08-28 02:28:44,375] Trial 215 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.5009589949269485e-05, 'weight_decay': 0.0871107102046939, 'per_device_train_batch_size': 8, 'warmup_steps': 47, 'warmup_ratio': 0.18285287879768497, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3986,2.202525,0.272727


[I 2024-08-28 02:29:16,822] Trial 216 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4086,2.233122,0.262626


[I 2024-08-28 02:29:49,263] Trial 217 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4114,2.234239,0.262626
2,1.788,1.806662,0.419192


[I 2024-08-28 02:30:37,304] Trial 218 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:30:50,196] Trial 219 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5442,2.286872,0.277778


[I 2024-08-28 02:31:17,286] Trial 220 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:31:34,141] Trial 221 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4182,2.174365,0.262626
2,1.786,1.776337,0.439394
3,1.0865,1.769377,0.494949


[I 2024-08-28 02:32:23,121] Trial 222 finished with value: 0.494949494949495 and parameters: {'learning_rate': 4.5220965109830694e-05, 'weight_decay': 0.08788604766456933, 'per_device_train_batch_size': 8, 'warmup_steps': 53, 'warmup_ratio': 0.15090878319402623, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4516,2.22655,0.267677


[I 2024-08-28 02:32:55,438] Trial 223 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4094,2.233464,0.262626


[I 2024-08-28 02:33:27,940] Trial 224 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5582,2.334008,0.267677


[I 2024-08-28 02:34:00,374] Trial 225 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.3465,2.184925,0.287879
2,1.7331,1.847217,0.424242


[I 2024-08-28 02:34:48,388] Trial 226 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4648,2.254112,0.262626
2,1.8428,1.812253,0.424242


[I 2024-08-28 02:35:36,228] Trial 227 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:35:53,344] Trial 228 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4593,2.208193,0.262626
2,1.8572,1.779863,0.444444
3,1.1377,1.768572,0.489899


[I 2024-08-28 02:36:42,173] Trial 229 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.187524985874306e-05, 'weight_decay': 0.08297366619653318, 'per_device_train_batch_size': 8, 'warmup_steps': 75, 'warmup_ratio': 0.01208228492152437, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4918,2.247646,0.267677
2,1.9279,1.845573,0.434343
3,1.2512,1.801444,0.5


[I 2024-08-28 02:37:31,219] Trial 230 finished with value: 0.5 and parameters: {'learning_rate': 4.067471095668069e-05, 'weight_decay': 0.08350283508629808, 'per_device_train_batch_size': 8, 'warmup_steps': 73, 'warmup_ratio': 0.19912896180227488, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4907,2.247345,0.267677
2,1.9275,1.847713,0.434343
3,1.2529,1.802968,0.5


[I 2024-08-28 02:38:20,196] Trial 231 finished with value: 0.5 and parameters: {'learning_rate': 4.0498249528734824e-05, 'weight_decay': 0.08130741716562584, 'per_device_train_batch_size': 8, 'warmup_steps': 72, 'warmup_ratio': 0.05390705219050414, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4975,2.250791,0.267677


[I 2024-08-28 02:38:52,518] Trial 232 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:39:09,351] Trial 233 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4479,2.199515,0.262626
2,1.8445,1.788528,0.444444
3,1.1395,1.770131,0.494949


[I 2024-08-28 02:39:58,417] Trial 234 finished with value: 0.494949494949495 and parameters: {'learning_rate': 4.102005197803768e-05, 'weight_decay': 0.08243216104166662, 'per_device_train_batch_size': 8, 'warmup_steps': 66, 'warmup_ratio': 0.049251314176858, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4904,2.247979,0.262626


[I 2024-08-28 02:40:30,818] Trial 235 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4809,2.267396,0.267677


[I 2024-08-28 02:41:03,140] Trial 236 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4759,2.265303,0.262626


[I 2024-08-28 02:41:35,600] Trial 237 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:41:52,659] Trial 238 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4379,2.190556,0.262626
2,1.8329,1.796194,0.429293
3,1.1404,1.773654,0.484848


[I 2024-08-28 02:42:41,596] Trial 239 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.039053722016228e-05, 'weight_decay': 0.08171562128719523, 'per_device_train_batch_size': 8, 'warmup_steps': 59, 'warmup_ratio': 0.051078486016145744, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4902,2.24622,0.267677
2,1.9203,1.835029,0.429293
3,1.2387,1.7946,0.494949


[I 2024-08-28 02:43:30,462] Trial 240 finished with value: 0.494949494949495 and parameters: {'learning_rate': 4.2004371296753054e-05, 'weight_decay': 0.08453844326454675, 'per_device_train_batch_size': 8, 'warmup_steps': 74, 'warmup_ratio': 0.055080419239507974, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4877,2.245552,0.267677
2,1.9175,1.837393,0.434343
3,1.2408,1.794671,0.5


[I 2024-08-28 02:44:19,366] Trial 241 finished with value: 0.5 and parameters: {'learning_rate': 4.180253623762688e-05, 'weight_decay': 0.08396389852748257, 'per_device_train_batch_size': 8, 'warmup_steps': 72, 'warmup_ratio': 0.05066962504363319, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5042,2.249712,0.262626


[I 2024-08-28 02:44:51,869] Trial 242 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4572,2.254616,0.262626


[I 2024-08-28 02:45:24,709] Trial 243 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4421,2.247855,0.267677


[I 2024-08-28 02:45:57,030] Trial 244 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:46:13,906] Trial 245 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:46:30,936] Trial 246 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5656,2.296821,0.277778


[I 2024-08-28 02:47:03,275] Trial 247 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:47:15,670] Trial 248 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5851,2.302371,0.277778


[I 2024-08-28 02:47:42,593] Trial 249 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4663,2.255352,0.262626


[I 2024-08-28 02:48:15,200] Trial 250 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4471,2.249847,0.267677


[I 2024-08-28 02:48:47,517] Trial 251 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5158,2.310793,0.262626


[I 2024-08-28 02:49:19,910] Trial 252 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4331,2.244592,0.262626


[I 2024-08-28 02:49:52,177] Trial 253 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4912,2.28359,0.262626


[I 2024-08-28 02:50:24,600] Trial 254 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:50:36,400] Trial 255 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5144,2.257093,0.282828


[I 2024-08-28 02:51:09,055] Trial 256 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5907,2.339841,0.267677


[I 2024-08-28 02:51:41,435] Trial 257 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4877,2.277845,0.262626


[I 2024-08-28 02:52:13,909] Trial 258 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.426,2.24042,0.262626


[I 2024-08-28 02:52:46,295] Trial 259 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:52:59,040] Trial 260 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4663,2.211704,0.262626


[I 2024-08-28 02:53:31,488] Trial 261 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4493,2.250934,0.267677
2,1.8673,1.833858,0.429293


[I 2024-08-28 02:54:19,208] Trial 262 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:54:33,322] Trial 263 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4198,2.176071,0.262626
2,1.8012,1.790561,0.439394
3,1.1155,1.772582,0.489899


[I 2024-08-28 02:55:22,443] Trial 264 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.193020177993349e-05, 'weight_decay': 0.04671190497635283, 'per_device_train_batch_size': 8, 'warmup_steps': 50, 'warmup_ratio': 0.02856182851514855, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4598,2.230912,0.262626


[I 2024-08-28 02:55:55,159] Trial 265 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4592,2.252316,0.262626


[I 2024-08-28 02:56:27,454] Trial 266 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:56:44,252] Trial 267 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.6006,2.308226,0.272727


[I 2024-08-28 02:57:16,730] Trial 268 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4166,2.23748,0.267677


[I 2024-08-28 02:57:49,035] Trial 269 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:58:05,884] Trial 270 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4736,2.217571,0.262626


[I 2024-08-28 02:58:38,233] Trial 271 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4114,2.23437,0.262626


[I 2024-08-28 02:59:10,712] Trial 272 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:59:27,776] Trial 273 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 02:59:40,103] Trial 274 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5634,2.295781,0.277778


[I 2024-08-28 03:00:12,496] Trial 275 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5008,2.295276,0.262626


[I 2024-08-28 03:00:44,881] Trial 276 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4583,2.280099,0.282828


[I 2024-08-28 03:01:17,264] Trial 277 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:01:34,069] Trial 278 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:01:48,157] Trial 279 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4392,2.195466,0.262626
2,1.8141,1.763506,0.439394
3,1.0814,1.763017,0.479798


[I 2024-08-28 03:02:37,480] Trial 280 finished with value: 0.4797979797979798 and parameters: {'learning_rate': 4.7684296186642696e-05, 'weight_decay': 0.0527662871651778, 'per_device_train_batch_size': 8, 'warmup_steps': 69, 'warmup_ratio': 0.11155352760334575, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:02:54,278] Trial 281 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:03:06,092] Trial 282 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4622,2.20929,0.262626
2,1.8573,1.775658,0.439394
3,1.1217,1.77421,0.489899


[I 2024-08-28 03:03:55,612] Trial 283 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.4184958873011735e-05, 'weight_decay': 0.07980457087191732, 'per_device_train_batch_size': 8, 'warmup_steps': 81, 'warmup_ratio': 0.05920040379207493, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5075,2.247677,0.262626
2,1.9418,1.825184,0.439394
3,1.2248,1.7903,0.494949


[I 2024-08-28 03:04:44,475] Trial 284 finished with value: 0.494949494949495 and parameters: {'learning_rate': 4.40362015940511e-05, 'weight_decay': 0.07850444438426288, 'per_device_train_batch_size': 8, 'warmup_steps': 91, 'warmup_ratio': 0.06176090548899181, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5923,2.314517,0.272727


[I 2024-08-28 03:05:16,814] Trial 285 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4624,2.247006,0.267677


[I 2024-08-28 03:05:49,258] Trial 286 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:06:02,018] Trial 287 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.466,2.211581,0.262626
2,1.8627,1.776196,0.439394
3,1.1264,1.769583,0.484848


[I 2024-08-28 03:06:51,293] Trial 288 finished with value: 0.48484848484848486 and parameters: {'learning_rate': 4.3513451204743966e-05, 'weight_decay': 0.0787451196007172, 'per_device_train_batch_size': 8, 'warmup_steps': 83, 'warmup_ratio': 0.054722628010379505, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:07:08,246] Trial 289 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:07:25,032] Trial 290 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.459,2.20807,0.262626
2,1.8549,1.775821,0.444444
3,1.1299,1.768401,0.489899


[I 2024-08-28 03:08:13,939] Trial 291 finished with value: 0.4898989898989899 and parameters: {'learning_rate': 4.265945708367652e-05, 'weight_decay': 0.0559266706213934, 'per_device_train_batch_size': 8, 'warmup_steps': 76, 'warmup_ratio': 0.047276941297190164, 'num_train_epochs': 3}. Best is trial 62 with value: 0.5050505050505051.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4988,2.23742,0.262626
2,1.9133,1.803974,0.444444
3,1.1869,1.778883,0.510101


[I 2024-08-28 03:09:02,721] Trial 292 finished with value: 0.51010101010101 and parameters: {'learning_rate': 4.867915819336374e-05, 'weight_decay': 0.056421880826897274, 'per_device_train_batch_size': 8, 'warmup_steps': 92, 'warmup_ratio': 0.04691041160072529, 'num_train_epochs': 3}. Best is trial 292 with value: 0.51010101010101.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:09:17,112] Trial 293 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


[I 2024-08-28 03:09:33,914] Trial 294 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4511,2.203412,0.262626
2,1.8326,1.762285,0.434343


[I 2024-08-28 03:10:21,864] Trial 295 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4738,2.256245,0.262626
2,1.8421,1.795011,0.429293


[I 2024-08-28 03:11:09,542] Trial 296 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.5086,2.280931,0.262626


[I 2024-08-28 03:11:41,915] Trial 297 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4526,2.243027,0.262626


[I 2024-08-28 03:12:14,208] Trial 298 pruned. 
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,2.4402,2.247483,0.262626


[I 2024-08-28 03:12:46,600] Trial 299 pruned. 


Best Hyperparameters:
 {'learning_rate': 4.867915819336374e-05, 'weight_decay': 0.056421880826897274, 'per_device_train_batch_size': 8, 'warmup_steps': 92, 'warmup_ratio': 0.04691041160072529, 'num_train_epochs': 3}


In [7]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=20)
best_params = {'learning_rate': 4.867915819336374e-05, 'weight_decay': 0.056421880826897274, 'per_device_train_batch_size': 8, 'warmup_steps': 92, 'warmup_ratio': 0.04691041160072529, 'num_train_epochs': 3}
training_args = TrainingArguments(
    output_dir="output",
    learning_rate=best_params['learning_rate'],
    weight_decay=best_params['weight_decay'],
    per_device_train_batch_size=best_params['per_device_train_batch_size'],
    warmup_steps=best_params['warmup_steps'],
    warmup_ratio=best_params['warmup_ratio'],
    num_train_epochs=best_params['num_train_epochs'],
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
)

trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
def load_model(save=False):
    if save:
        trainer.save_model('./trained_model')
    return AutoModelForSequenceClassification.from_pretrained('./trained_model')

trained_model = load_model(save=True)
trained_model.evaluate()


  0%|          | 0/13 [00:00<?, ?it/s]

{'eval_loss': 2.4165635108947754, 'eval_accuracy': 0.3465346534653465, 'eval_runtime': 4.9265, 'eval_samples_per_second': 20.502, 'eval_steps_per_second': 2.639, 'epoch': 4.0}


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(52000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [None]:
sentence = "קובי נסע לכיכר הרצל בלוד"
# sentence = "זרקו שם מטען חבלה"
input = tokenizer(sentence, padding=True, truncation=True, return_tensors='pt')
output = trained_model(**input)
print(output)
pred = softmax(output.logits, dim=-1).detach().numpy()
print(pred)
print(np.argmax(pred) + 1)

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.1365, -0.1568, -0.4376,  0.1788, -0.5550, -0.3552, -0.6783,  0.0173,
         -1.0358,  0.6358, -0.6998, -1.8162, -0.4686,  2.8654,  4.7651,  0.1125,
         -0.6668, -1.7059,  0.1414, -0.9894]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
[[0.00772378 0.00575987 0.00435009 0.00805722 0.00386803 0.0047237
  0.00341949 0.00685559 0.00239168 0.01272504 0.00334669 0.00109589
  0.00421709 0.11828709 0.79068893 0.00754044 0.00345895 0.00122368
  0.00776161 0.00250517]]
15
