# Clintox LoRA Finetuning

Loading Dataset

In [7]:
import pandas as pd

train_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_train.csv')
val_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_valid.csv')

In [2]:
clin_sub=train_clin.drop(['FDA_APPROVED','smiles'],axis=1)

Load tokenizer and Classsification Model

In [8]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the tokenizer
tokenizer_clin = AutoTokenizer.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    trust_remote_code=True
)

# Load the model with a classification head
model_clin = AutoModelForSequenceClassification.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True
)

Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Preparing Training and Validation Dataset

In [9]:
smiles_list_clin = train_clin['smiles'].tolist()
smiles_val_clin=val_clin['smiles'].tolist()
train_tokenized_clin=tokenizer_clin(smiles_list_clin)
val_tokenized_clin=tokenizer_clin(smiles_val_clin)

In [10]:
from datasets import Dataset
train_dataset_clin = Dataset.from_dict(train_tokenized_clin)
val_dataset_clin = Dataset.from_dict(val_tokenized_clin)

In [11]:
train_labels_clin = train_clin['CT_TOX'].tolist() # Assuming tasks start from column 1
val_labels_clin = val_clin['CT_TOX'].tolist()

In [12]:
train_dataset_clin = train_dataset_clin.add_column("labels", train_labels_clin)
val_dataset_clin = val_dataset_clin.add_column("labels", val_labels_clin)

LoRA Finetuning

In [2]:
!pip install tf-keras

Collecting tf-keras
  Downloading tf_keras-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Downloading tf_keras-2.19.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tf-keras
Successfully installed tf-keras-2.19.0
Note: you may need to restart the kernel to use updated packages.


In [15]:
import wandb
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from evaluate import load
from datasets import Dataset
import numpy as np
import pandas as pd
import os
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score,matthews_corrcoef

In [4]:
def data_load():
    train_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_train.csv')
    val_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_test.csv')

    return train_clin, val_clin

In [5]:
def data_prep(data_process,tokenizer_clin):

    smiles_list_clin = data_process['smiles'].tolist()
    tokenized_clin=tokenizer_clin(smiles_list_clin)
    
    
    dataset_clin = Dataset.from_dict(tokenized_clin)
    

    labels_clin = data_process['CT_TOX'].tolist() # Assuming tasks start from column 1
    
    dataset_clin = dataset_clin.add_column("labels", labels_clin)
    

    return dataset_clin

In [6]:
from peft import LoraConfig, get_peft_model, PeftModel

def lora_config(r,lora_alpha,dropout):

    lora_config = LoraConfig(
        task_type="SEQ_CLS",  # Sequence classification task
        r=r,  # Rank of LoRA matrices
        lora_alpha=lora_alpha,  # Scaling factor double of rank( from the rule of thumb)
        target_modules='all-linear',
        lora_dropout=dropout  # Dropout rate
        #init_lora_weights="gaussian"
    )

    return lora_config

In [13]:
import torch

class_weights= [1-(train_dataset_clin['labels'].count(0)/len(train_dataset_clin['labels'])),
                           1-(train_dataset_clin['labels'].count(1)/len(train_dataset_clin['labels']))]

class_weights = torch.from_numpy(np.array(class_weights)).float().to("cuda")

class WeightedLossTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):

        outputs = model(**inputs)
        logits = outputs.get("logits")

        # Extract labels
        labels = inputs.get("labels")

        # compute custom loss (suppose one has 2 labels with different weights)
        loss_func = torch.nn.CrossEntropyLoss(weight=class_weights)

        # compute loss
        loss = loss_func(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [14]:
from evaluate import load
import numpy as np
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score,matthews_corrcoef

accuracy_metric = load("accuracy")
mcc_metric= load("matthews_correlation")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    
    probabilities = softmax(logits, axis=1)[:, 1]  # Get probabilities for class 1
    predictions = np.argmax(logits, axis=1)  # Choose the most likely class
    

    mcc = matthews_corrcoef(labels, predictions)

    return {
        "eval_mcc_metric": mcc,
        "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
        "AUC-ROC": roc_auc_score(labels, probabilities),  # AUC-ROC requires probabilities
        "Precision": precision_score(labels, predictions),
        "Recall": recall_score(labels, predictions),
        "F1-score": f1_score(labels, predictions)
    } 



In [16]:

# Initialize W&B with sweep
def run_training():
    run = wandb.init(project="Clintox Hyperparameter Tuning")
    config = run.config   

    # Define unique save path for each W&B run
    save_dir = f"./models_molformer_clintox/{wandb.run.id}"  # Unique directory for each run
    os.makedirs(save_dir, exist_ok=True)

    tokenizer_clin = AutoTokenizer.from_pretrained(
        "ibm/MoLFormer-XL-both-10pct",
        trust_remote_code=True
    )

    # Load data
    train_data, val_data = data_load()
    training_data = data_prep(train_data, tokenizer_clin)
    validation_data = data_prep(val_data, tokenizer_clin)

    # Load base model
    model_clin = AutoModelForSequenceClassification.from_pretrained(
        "ibm/MoLFormer-XL-both-10pct",
        num_labels=2,
        problem_type="single_label_classification",    
        trust_remote_code=True
    )

    # Apply LoRA
    peft_config = lora_config(config.r, config.lora_alpha, config.dropout)
    lora_model = get_peft_model(model_clin, peft_config)
    
    lora_model.print_trainable_parameters()

    # Training arguments
    training_args = TrainingArguments(
        output_dir=save_dir,
        evaluation_strategy="epoch",
        learning_rate=config.lr,
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        num_train_epochs=20,
        weight_decay=0.01,
        save_strategy="epoch",  # Save model at each epoch
        logging_dir=f"./logs_clin/{wandb.run.id}",
        logging_strategy="steps",
        logging_steps=100,
        report_to="wandb",
        save_total_limit=5,
        load_best_model_at_end=True,
        metric_for_best_model="eval_mcc_metric"
    )

    accuracy_metric = load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probabilities = softmax(logits, axis=1)[:, 1]  
        predictions = np.argmax(logits, axis=1)  
        mcc = matthews_corrcoef(labels, predictions)

        return {
            "eval_mcc_metric": mcc,
            "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities),
            "Precision": precision_score(labels, predictions),
            "Recall": recall_score(labels, predictions),
            "F1-score": f1_score(labels, predictions)
        }

    # Initialize trainer
    trainer = WeightedLossTrainer(
        model=lora_model,
        args=training_args,
        train_dataset=training_data,
        eval_dataset=validation_data,
        tokenizer=tokenizer_clin,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )

    # Train model
    trainer.train()

    # Save model and tokenizer for this run
    trainer.save_model(save_dir)
    tokenizer_clin.save_pretrained(save_dir)
    
    print(f"Model saved to {save_dir}")
    
    wandb.finish()


In [17]:
 # Define the sweep configuration
def main():

    sweep_config = {
    "name": "Clintox Hyperparameter Tuning",
    "method": "bayes",
    "metric": {
        "goal": "maximize",
        "name": "eval/mcc_metric"
        },
    "parameters": {

        "lr": {

            "distribution": "uniform",
            "min": 1e-5,
            "max": 2e-5
        },
        "r": {
            "values": [4, 8, 16, 32,64]
        },
        "lora_alpha": {
            "values": [8, 16, 32, 64,128]
        },
        "dropout": {
            "values": [0.0, 0.1, 0.2]
        }
    }
    }
    sweep_id = wandb.sweep(sweep_config, project="huggingface")
    wandb.agent(sweep_id, function=run_training, count=10)

    api = wandb.Api()
    sweep = api.sweep(f"huggingface/{sweep_id}")
    print(sweep.runs[0].summary_metrics)

    runs_with_eval_loss = [run for run in sweep.runs if 'eval/mcc_metric' in run.summary_metrics]

    if runs_with_eval_loss:
        best_run = sorted(runs_with_eval_loss, key=lambda run: run.summary_metrics['eval/mcc_metric'],reverse=False)[0]
    else:
        raise ValueError("No runs found with 'eval/mcc_metric' metric.")

    best_hyperparameters = best_run.config
    print(best_hyperparameters)

    

if __name__ == "__main__":
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    main()

Create sweep with ID: ambvlx5s
Sweep URL: https://wandb.ai/harodharsha21-iit-ropar/huggingface/sweeps/ambvlx5s


[34m[1mwandb[0m: Agent Starting Run: kbo8a99q with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	lora_alpha: 64
[34m[1mwandb[0m: 	lr: 1.6312099053672378e-05
[34m[1mwandb[0m: 	r: 64
[34m[1mwandb[0m: Currently logged in as: [33mharodharsha21[0m ([33mharodharsha21-iit-ropar[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 8,506,498 || all params: 54,310,148 || trainable%: 15.6628


  trainer = WeightedLossTrainer(


[2025-04-10 08:02:43,126] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/storage/qnap_home/raghvendra2/micromamba/envs/Molformer/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/storage/qnap_home/raghvendra2/micromamba/envs/Molformer/compiler_compat/ld: cannot find -lcufile: No such file or directory
collect2: error: ld returned 1 exit status
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.459713,0.704154,0.923077,0.975148,0.541667,1.0,0.702703
2,No log,0.269452,0.81376,0.965035,0.991716,0.75,0.923077,0.827586
3,0.424100,0.167435,0.915385,0.986014,0.991716,0.923077,0.923077,0.923077
4,0.424100,0.136626,0.912871,0.986014,0.995858,1.0,0.846154,0.916667
5,0.424100,0.154945,0.912871,0.986014,0.992899,1.0,0.846154,0.916667
6,0.116200,0.182133,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
7,0.116200,0.239986,0.912871,0.986014,0.992308,1.0,0.846154,0.916667
8,0.091200,0.279319,0.912871,0.986014,0.989349,1.0,0.846154,0.916667


Model saved to ./models_molformer_clintox/kbo8a99q


0,1
eval/AUC-ROC,▁▇▇█▇▇▇▆
eval/Accuracy,▁▆██████
eval/F1-score,▁▅██████
eval/Precision,▁▄▇█████
eval/Recall,█▅▅▁▁▁▁▁
eval/loss,█▄▂▁▁▂▃▄
eval/mcc_metric,▁▅██████
eval/runtime,█▂▁▁▂▂▃▁
eval/samples_per_second,▁▇██▇▇▅█
eval/steps_per_second,▁▇██▇▇▅█

0,1
eval/AUC-ROC,0.98935
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.27932
eval/mcc_metric,0.91287
eval/runtime,0.4258
eval/samples_per_second,335.865
eval/steps_per_second,11.744


[34m[1mwandb[0m: Agent Starting Run: lm8vser5 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 16
[34m[1mwandb[0m: 	lr: 1.0421017672822343e-05
[34m[1mwandb[0m: 	r: 4


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,640,458 || all params: 47,213,588 || trainable%: 3.4745


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.524808,0.656103,0.916084,0.956213,0.521739,0.923077,0.666667
2,No log,0.399298,0.957095,0.993007,0.998817,1.0,0.923077,0.96
3,0.504600,0.333783,0.915385,0.986014,0.997633,0.923077,0.923077,0.923077
4,0.504600,0.271847,0.957095,0.993007,1.0,1.0,0.923077,0.96
5,0.504600,0.239523,0.912871,0.986014,0.997633,1.0,0.846154,0.916667
6,0.281200,0.211013,0.912871,0.986014,0.998817,1.0,0.846154,0.916667
7,0.281200,0.17412,0.912871,0.986014,0.998817,1.0,0.846154,0.916667


Model saved to ./models_molformer_clintox/lm8vser5


0,1
eval/AUC-ROC,▁██████
eval/Accuracy,▁█▇█▇▇▇
eval/F1-score,▁█▇█▇▇▇
eval/Precision,▁█▇████
eval/Recall,████▁▁▁
eval/loss,█▅▄▃▂▂▁
eval/mcc_metric,▁█▇█▇▇▇
eval/runtime,▆▄█▄▁▆▁
eval/samples_per_second,▃▅▁▅█▃█
eval/steps_per_second,▃▅▁▅█▃█

0,1
eval/AUC-ROC,0.99882
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.17412
eval/mcc_metric,0.91287
eval/runtime,0.3994
eval/samples_per_second,358.062
eval/steps_per_second,12.52


[34m[1mwandb[0m: Agent Starting Run: o1vbzyf5 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 32
[34m[1mwandb[0m: 	lr: 1.5261371145863336e-05
[34m[1mwandb[0m: 	r: 32


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 4,844,610 || all params: 50,525,316 || trainable%: 9.5885


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.472708,0.643952,0.923077,0.954438,0.55,0.846154,0.666667
2,No log,0.325774,0.915385,0.986014,0.987574,0.923077,0.923077,0.923077
3,0.446300,0.22362,0.915385,0.986014,0.991124,0.923077,0.923077,0.923077
4,0.446300,0.148105,0.915385,0.986014,0.995858,0.923077,0.923077,0.923077
5,0.446300,0.160287,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
6,0.163700,0.17084,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
7,0.163700,0.145772,0.912871,0.986014,0.99645,1.0,0.846154,0.916667


Model saved to ./models_molformer_clintox/o1vbzyf5


0,1
eval/AUC-ROC,▁▇▇████
eval/Accuracy,▁██████
eval/F1-score,▁██████
eval/Precision,▁▇▇▇███
eval/Recall,▁███▁▁▁
eval/loss,█▅▃▁▁▂▁
eval/mcc_metric,▁██████
eval/runtime,▁▄▄▆▇█▇
eval/samples_per_second,█▅▅▃▂▁▂
eval/steps_per_second,█▅▅▃▂▁▂

0,1
eval/AUC-ROC,0.99645
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.14577
eval/mcc_metric,0.91287
eval/runtime,0.4148
eval/samples_per_second,344.716
eval/steps_per_second,12.053


[34m[1mwandb[0m: Agent Starting Run: 2diwcmfr with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 8
[34m[1mwandb[0m: 	lr: 1.4895769470124064e-05
[34m[1mwandb[0m: 	r: 64


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 8,506,498 || all params: 54,310,148 || trainable%: 15.6628


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.499392,0.558934,0.888112,0.94142,0.44,0.846154,0.578947
2,No log,0.377013,0.844375,0.972028,0.985207,0.8,0.923077,0.857143
3,0.480300,0.291138,0.915385,0.986014,0.991124,0.923077,0.923077,0.923077
4,0.480300,0.213623,0.915385,0.986014,0.998225,0.923077,0.923077,0.923077
5,0.480300,0.1949,0.912871,0.986014,0.99645,1.0,0.846154,0.916667
6,0.228500,0.185717,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
7,0.228500,0.148291,0.957095,0.993007,0.99645,1.0,0.923077,0.96
8,0.153600,0.146876,0.912871,0.986014,0.997041,1.0,0.846154,0.916667
9,0.153600,0.15382,0.957095,0.993007,0.992899,1.0,0.923077,0.96
10,0.153600,0.131516,0.957095,0.993007,0.99645,1.0,0.923077,0.96


Model saved to ./models_molformer_clintox/2diwcmfr


0,1
eval/AUC-ROC,▁▆▇█████▇███
eval/Accuracy,▁▇██████████
eval/F1-score,▁▆▇▇▇▇█▇██▇▇
eval/Precision,▁▆▇▇████████
eval/Recall,▁███▁▁█▁██▁▁
eval/loss,█▆▄▃▂▂▁▁▁▁▁▂
eval/mcc_metric,▁▆▇▇▇▇█▇██▇▇
eval/runtime,█▂▂▁▃▅▂▂▆▄▆▅
eval/samples_per_second,▁▇▇█▆▄▇▇▃▅▃▄
eval/steps_per_second,▁▇▇█▆▄▇▇▃▅▃▄

0,1
eval/AUC-ROC,0.99527
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.16669
eval/mcc_metric,0.91287
eval/runtime,0.4268
eval/samples_per_second,335.017
eval/steps_per_second,11.714


[34m[1mwandb[0m: Agent Starting Run: 3wzclq6q with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.4142564104975132e-05
[34m[1mwandb[0m: 	r: 8


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 2,098,194 || all params: 47,686,692 || trainable%: 4.4000


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.433198,0.81376,0.965035,0.963905,0.75,0.923077,0.827586
2,No log,0.247336,0.869361,0.979021,0.991124,0.916667,0.846154,0.88
3,0.368700,0.172291,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
4,0.368700,0.211937,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
5,0.368700,0.289031,0.912871,0.986014,0.988757,1.0,0.846154,0.916667
6,0.104900,0.312574,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
7,0.104900,0.477216,0.912871,0.986014,0.986391,1.0,0.846154,0.916667
8,0.125000,0.569535,0.912871,0.986014,0.981657,1.0,0.846154,0.916667


Model saved to ./models_molformer_clintox/3wzclq6q


0,1
eval/AUC-ROC,▁▇██▇█▆▅
eval/Accuracy,▁▆██████
eval/F1-score,▁▅██████
eval/Precision,▁▆██████
eval/Recall,█▁▁▁▁▁▁▁
eval/loss,▆▂▁▂▃▃▆█
eval/mcc_metric,▁▅██████
eval/runtime,▁▁█▃█▇▄▇
eval/samples_per_second,██▁▆▁▂▅▂
eval/steps_per_second,██▁▆▁▂▅▂

0,1
eval/AUC-ROC,0.98166
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.56953
eval/mcc_metric,0.91287
eval/runtime,0.4123
eval/samples_per_second,346.809
eval/steps_per_second,12.126


[34m[1mwandb[0m: Agent Starting Run: jvf2j5zv with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	lora_alpha: 16
[34m[1mwandb[0m: 	lr: 1.5141899736019033e-05
[34m[1mwandb[0m: 	r: 4


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,640,458 || all params: 47,213,588 || trainable%: 3.4745


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.457875,0.844375,0.972028,0.994675,0.8,0.923077,0.857143
2,No log,0.333438,0.957095,0.993007,0.997633,1.0,0.923077,0.96
3,0.449500,0.239397,0.915385,0.986014,0.999408,0.923077,0.923077,0.923077
4,0.449500,0.182941,0.912871,0.986014,1.0,1.0,0.846154,0.916667
5,0.449500,0.151454,0.957095,0.993007,0.998817,1.0,0.923077,0.96
6,0.191700,0.154435,0.912871,0.986014,1.0,1.0,0.846154,0.916667
7,0.191700,0.139757,0.912871,0.986014,0.999408,1.0,0.846154,0.916667


Model saved to ./models_molformer_clintox/jvf2j5zv


0,1
eval/AUC-ROC,▁▅▇█▆█▇
eval/Accuracy,▁█▆▆█▆▆
eval/F1-score,▁█▅▅█▅▅
eval/Precision,▁█▅████
eval/Recall,███▁█▁▁
eval/loss,█▅▃▂▁▁▁
eval/mcc_metric,▁█▅▅█▅▅
eval/runtime,▃▁▃▂▆▇█
eval/samples_per_second,▆█▆▇▃▂▁
eval/steps_per_second,▆█▆▇▃▂▁

0,1
eval/AUC-ROC,0.99941
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.13976
eval/mcc_metric,0.91287
eval/runtime,0.4105
eval/samples_per_second,348.335
eval/steps_per_second,12.18


[34m[1mwandb[0m: Agent Starting Run: gulowmgl with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 8
[34m[1mwandb[0m: 	lr: 1.2023686914961584e-05
[34m[1mwandb[0m: 	r: 16


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 3,013,666 || all params: 48,632,900 || trainable%: 6.1968


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.529954,0.505984,0.86014,0.914793,0.37931,0.846154,0.52381
2,No log,0.423347,0.785757,0.958042,0.980473,0.705882,0.923077,0.8
3,0.512600,0.34178,0.878058,0.979021,0.988757,0.857143,0.923077,0.888889
4,0.512600,0.263308,0.915385,0.986014,0.998225,0.923077,0.923077,0.923077
5,0.512600,0.234602,0.915385,0.986014,0.995858,0.923077,0.923077,0.923077
6,0.277800,0.216456,0.957095,0.993007,0.994083,1.0,0.923077,0.96
7,0.277800,0.176623,0.957095,0.993007,0.997633,1.0,0.923077,0.96
8,0.185400,0.162396,0.912871,0.986014,0.997633,1.0,0.846154,0.916667
9,0.185400,0.165579,0.957095,0.993007,0.992899,1.0,0.923077,0.96
10,0.185400,0.14114,0.957095,0.993007,0.99645,1.0,0.923077,0.96


Model saved to ./models_molformer_clintox/gulowmgl


0,1
eval/AUC-ROC,▁▇▇████████
eval/Accuracy,▁▆▇████████
eval/F1-score,▁▅▇▇▇██▇██▇
eval/Precision,▁▅▆▇▇██████
eval/Recall,▁██████▁██▁
eval/loss,█▆▅▃▃▂▂▁▁▁▁
eval/mcc_metric,▁▅▇▇▇██▇██▇
eval/runtime,▄▇█▇█▅▂▄▁▁▁
eval/samples_per_second,▅▂▁▂▁▄▆▅███
eval/steps_per_second,▅▂▁▂▁▄▆▅███

0,1
eval/AUC-ROC,0.99822
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.14207
eval/mcc_metric,0.91287
eval/runtime,0.4013
eval/samples_per_second,356.361
eval/steps_per_second,12.46


[34m[1mwandb[0m: Agent Starting Run: k121h7u1 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 32
[34m[1mwandb[0m: 	lr: 1.2561499111615004e-05
[34m[1mwandb[0m: 	r: 4


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,640,458 || all params: 47,213,588 || trainable%: 3.4745


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.581817,0.498914,0.832168,0.899408,0.342857,0.923077,0.5
2,No log,0.410739,0.785757,0.958042,0.974556,0.705882,0.923077,0.8
3,0.525100,0.316396,0.81376,0.965035,0.987574,0.75,0.923077,0.827586
4,0.525100,0.223903,0.844375,0.972028,0.994675,0.8,0.923077,0.857143
5,0.525100,0.198002,0.830769,0.972028,0.991716,0.846154,0.846154,0.846154
6,0.224900,0.188338,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
7,0.224900,0.178192,0.869361,0.979021,0.992308,0.916667,0.846154,0.88
8,0.131100,0.179138,0.912871,0.986014,0.99645,1.0,0.846154,0.916667
9,0.131100,0.186915,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
10,0.131100,0.209079,0.912871,0.986014,0.993491,1.0,0.846154,0.916667


Model saved to ./models_molformer_clintox/k121h7u1


0,1
eval/AUC-ROC,▁▆▇████████
eval/Accuracy,▁▇▇▇▇██████
eval/F1-score,▁▆▇▇▇█▇████
eval/Precision,▁▅▅▆▆█▇████
eval/Recall,████▁▁▁▁▁▁▁
eval/loss,█▅▃▂▁▁▁▁▁▂▂
eval/mcc_metric,▁▆▆▇▇█▇████
eval/runtime,▄▄▆▇▇▆█▄▄▁▁
eval/samples_per_second,▅▅▃▂▂▃▁▅▅██
eval/steps_per_second,▅▅▃▂▂▃▁▅▅██

0,1
eval/AUC-ROC,0.99349
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.25012
eval/mcc_metric,0.91287
eval/runtime,0.3964
eval/samples_per_second,360.786
eval/steps_per_second,12.615


[34m[1mwandb[0m: Agent Starting Run: btpw4450 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.823268967202021e-05
[34m[1mwandb[0m: 	r: 16


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 3,013,666 || all params: 48,632,900 || trainable%: 6.1968


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.414801,0.796205,0.965035,0.978698,0.785714,0.846154,0.814815
2,No log,0.191054,0.915385,0.986014,0.989941,0.923077,0.923077,0.923077
3,0.353100,0.179716,0.912871,0.986014,0.992899,1.0,0.846154,0.916667
4,0.353100,0.155668,0.915385,0.986014,0.992899,0.923077,0.923077,0.923077
5,0.353100,0.442334,0.912871,0.986014,0.982249,1.0,0.846154,0.916667
6,0.098200,0.330261,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
7,0.098200,0.374909,0.869361,0.979021,0.988757,0.916667,0.846154,0.88


Model saved to ./models_molformer_clintox/btpw4450


0,1
eval/AUC-ROC,▁▇██▃▇▆
eval/Accuracy,▁█████▆
eval/F1-score,▁█████▅
eval/Precision,▁▅█▅██▅
eval/Recall,▁█▁█▁▁▁
eval/loss,▇▂▂▁█▅▆
eval/mcc_metric,▁█████▅
eval/runtime,▆▅▇█▁▄▅
eval/samples_per_second,▃▄▂▁█▅▄
eval/steps_per_second,▃▄▂▁█▅▄

0,1
eval/AUC-ROC,0.98876
eval/Accuracy,0.97902
eval/F1-score,0.88
eval/Precision,0.91667
eval/Recall,0.84615
eval/loss,0.37491
eval/mcc_metric,0.86936
eval/runtime,0.3966
eval/samples_per_second,360.556
eval/steps_per_second,12.607


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: g66590oz with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 32
[34m[1mwandb[0m: 	lr: 1.361425656452323e-05
[34m[1mwandb[0m: 	r: 32


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 4,844,610 || all params: 50,525,316 || trainable%: 9.5885


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.492128,0.558934,0.888112,0.946154,0.44,0.846154,0.578947
2,No log,0.355241,0.844375,0.972028,0.985799,0.8,0.923077,0.857143
3,0.467000,0.253257,0.915385,0.986014,0.989941,0.923077,0.923077,0.923077
4,0.467000,0.169533,0.915385,0.986014,0.995858,0.923077,0.923077,0.923077
5,0.467000,0.163836,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
6,0.185500,0.170438,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
7,0.185500,0.139319,0.912871,0.986014,0.99645,1.0,0.846154,0.916667
8,0.125500,0.155448,0.912871,0.986014,0.997041,1.0,0.846154,0.916667


Model saved to ./models_molformer_clintox/g66590oz


0,1
eval/AUC-ROC,▁▆▇█████
eval/Accuracy,▁▇██████
eval/F1-score,▁▇██████
eval/Precision,▁▆▇▇████
eval/Recall,▁███▁▁▁▁
eval/loss,█▅▃▂▁▂▁▁
eval/mcc_metric,▁▇██████
eval/runtime,▂▁▅▂▅▂▂█
eval/samples_per_second,▇█▄█▄▇▇▁
eval/steps_per_second,▇█▄█▄▇▇▁

0,1
eval/AUC-ROC,0.99704
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.15545
eval/mcc_metric,0.91287
eval/runtime,0.4007
eval/samples_per_second,356.918
eval/steps_per_second,12.48


{'_runtime': 73.726774537, '_step': 11, '_timestamp': 1744265830.85145, '_wandb': {'runtime': 74}, 'eval/AUC-ROC': 0.9970414201183432, 'eval/Accuracy': 0.986013986013986, 'eval/F1-score': 0.9166666666666666, 'eval/Precision': 1, 'eval/Recall': 0.8461538461538461, 'eval/loss': 0.15544822812080383, 'eval/mcc_metric': 0.9128709291752768, 'eval/runtime': 0.4007, 'eval/samples_per_second': 356.918, 'eval/steps_per_second': 12.48, 'total_flos': 405128764576800, 'train/epoch': 8, 'train/global_step': 304, 'train/grad_norm': 0.46080031991004944, 'train/learning_rate': 8.24020792063248e-06, 'train/loss': 0.1255, 'train_loss': 0.2561702096197558, 'train_runtime': 65.9972, 'train_samples_per_second': 359.106, 'train_steps_per_second': 11.516}


Evaluate on Test Dataset

In [18]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

base_model = AutoModelForSequenceClassification.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    deterministic_eval=True
)

tokenizer_clin = AutoTokenizer.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    trust_remote_code=True
)

Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [41]:
adapter_model = PeftModel.from_pretrained(base_model, '/home/raghvendra2/Molformer_Finetuning/models/q3mia2tx')



### Comments on trained model

- bad performance- best_clintox_w__model

- best performance: best_clintox_model and checkpoint-150

- with 0.2 didnt replicate the same model, lets check for 0.3: 

In [19]:
import pandas as pd

test_data_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_test.csv')

In [20]:
from datasets import Dataset

smiles_test_clin = test_data_clin['smiles'].tolist()

test_tokenized_clin =tokenizer_clin(smiles_test_clin)

test_dataset_clin = Dataset.from_dict(test_tokenized_clin)

In [21]:
test_labels_clin = test_data_clin['CT_TOX'].tolist() 


test_dataset_clin = test_dataset_clin.add_column("labels", test_labels_clin)

In [22]:
from evaluate import load
import numpy as np
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score, matthews_corrcoef

accuracy_metric = load("accuracy")

def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probabilities = softmax(logits, axis=1)[:, 1]  # Get probabilities for class 1
        predictions = np.argmax(logits, axis=1)  # Choose the most likely class
        mcc = matthews_corrcoef(labels, predictions)
        
        
        return {
            "eval_mcc_metric": mcc,
            "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities),  # AUC-ROC requires probabilities
            "Precision": precision_score(labels, predictions),
            "Recall": recall_score(labels, predictions),
            "F1-score": f1_score(labels, predictions)
        }

In [23]:
from transformers import Trainer, TrainingArguments

eval_args = TrainingArguments(
    output_dir="./test_results_clintox_wandb",
    per_device_eval_batch_size=16,
    report_to="none",  # Disable logging to W&B for test
    seed=42,  # Ensures reproducibility

)

tokenizer = AutoTokenizer.from_pretrained(
"ibm/MoLFormer-XL-both-10pct",
trust_remote_code=True
)

In [24]:
trainer = WeightedLossTrainer(
        model=adapter_model,
        args=eval_args,
        eval_dataset=test_dataset_clin,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

NameError: name 'adapter_model' is not defined

In [43]:
test_results_clin = trainer.evaluate()

print("Test Results for model2:", test_results_clin)

Test Results for model2: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.23767508566379547, 'eval_model_preparation_time': 0.0144, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.993491124260355, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3836, 'eval_samples_per_second': 372.794, 'eval_steps_per_second': 23.463}


In [30]:
# List all checkpoints inside models directory
import os
from peft import PeftModel

models_dir = "./models_molformer_clintox"

def find_all_checkpoints(base_dir):
    all_checkpoints = []
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path):
            for subfolder in os.listdir(folder_path):
                subfolder_path = os.path.join(folder_path, subfolder)
                if os.path.isdir(subfolder_path) and subfolder.startswith("checkpoint-"):
                    if os.path.exists(os.path.join(subfolder_path, "adapter_config.json")):
                        all_checkpoints.append(subfolder_path)
    return all_checkpoints

valid_checkpoints = find_all_checkpoints(models_dir)
print("🧠 Valid nested checkpoints found:", valid_checkpoints)

for checkpoint_path in valid_checkpoints:
    checkpoint_name = os.path.basename(checkpoint_path)
    parent_folder = os.path.basename(os.path.dirname(checkpoint_path))

    print(f"\n🔍 Evaluating model: {parent_folder}/{checkpoint_name}")

    adapter_model = PeftModel.from_pretrained(base_model, checkpoint_path)
    adapter_model.eval()

    trainer = WeightedLossTrainer(
        model=adapter_model,
        args=eval_args,
        eval_dataset=test_dataset_clin,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )


    test_results = trainer.evaluate()
    auc_score = test_results["eval_AUC-ROC"]
    if auc_score > 0.997:
        print(f"✅ AUC_ROC > 0.99 for {parent_folder}/{checkpoint_name}")
        print(f"📌 Test Results: {test_results}")
    else:
        print(f"❌ Skipping {parent_folder}/{checkpoint_name}")





🧠 Valid nested checkpoints found: ['./models_molformer_clintox/kbo8a99q/checkpoint-266', './models_molformer_clintox/kbo8a99q/checkpoint-304', './models_molformer_clintox/kbo8a99q/checkpoint-114', './models_molformer_clintox/kbo8a99q/checkpoint-228', './models_molformer_clintox/kbo8a99q/checkpoint-190', './models_molformer_clintox/gulowmgl/checkpoint-304', './models_molformer_clintox/gulowmgl/checkpoint-228', './models_molformer_clintox/gulowmgl/checkpoint-342', './models_molformer_clintox/gulowmgl/checkpoint-380', './models_molformer_clintox/gulowmgl/checkpoint-418', './models_molformer_clintox/jvf2j5zv/checkpoint-76', './models_molformer_clintox/jvf2j5zv/checkpoint-266', './models_molformer_clintox/jvf2j5zv/checkpoint-228', './models_molformer_clintox/jvf2j5zv/checkpoint-152', './models_molformer_clintox/jvf2j5zv/checkpoint-190', './models_molformer_clintox/g66590oz/checkpoint-266', './models_molformer_clintox/g66590oz/checkpoint-304', './models_molformer_clintox/g66590oz/checkpoint-

  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping kbo8a99q/checkpoint-266

🔍 Evaluating model: kbo8a99q/checkpoint-304


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping kbo8a99q/checkpoint-304

🔍 Evaluating model: kbo8a99q/checkpoint-114


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping kbo8a99q/checkpoint-114

🔍 Evaluating model: kbo8a99q/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping kbo8a99q/checkpoint-228

🔍 Evaluating model: kbo8a99q/checkpoint-190


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping kbo8a99q/checkpoint-190

🔍 Evaluating model: gulowmgl/checkpoint-304


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for gulowmgl/checkpoint-304
📌 Test Results: {'eval_mcc_metric': 0.9570948408340445, 'eval_loss': 0.15103395283222198, 'eval_model_preparation_time': 0.013, 'eval_Accuracy': 0.993006993006993, 'eval_AUC-ROC': 0.9976331360946745, 'eval_Precision': 1.0, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.96, 'eval_runtime': 0.38, 'eval_samples_per_second': 376.282, 'eval_steps_per_second': 23.682}

🔍 Evaluating model: gulowmgl/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for gulowmgl/checkpoint-228
📌 Test Results: {'eval_mcc_metric': 0.9570948408340445, 'eval_loss': 0.18789832293987274, 'eval_model_preparation_time': 0.0132, 'eval_Accuracy': 0.993006993006993, 'eval_AUC-ROC': 0.9976331360946745, 'eval_Precision': 1.0, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.96, 'eval_runtime': 0.3793, 'eval_samples_per_second': 376.961, 'eval_steps_per_second': 23.725}

🔍 Evaluating model: gulowmgl/checkpoint-342


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for gulowmgl/checkpoint-342
📌 Test Results: {'eval_mcc_metric': 0.9570948408340445, 'eval_loss': 0.14021161198616028, 'eval_model_preparation_time': 0.0128, 'eval_Accuracy': 0.993006993006993, 'eval_AUC-ROC': 0.9982248520710059, 'eval_Precision': 1.0, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.96, 'eval_runtime': 0.3805, 'eval_samples_per_second': 375.846, 'eval_steps_per_second': 23.655}

🔍 Evaluating model: gulowmgl/checkpoint-380


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for gulowmgl/checkpoint-380
📌 Test Results: {'eval_mcc_metric': 0.9570948408340445, 'eval_loss': 0.12716948986053467, 'eval_model_preparation_time': 0.0129, 'eval_Accuracy': 0.993006993006993, 'eval_AUC-ROC': 0.9976331360946745, 'eval_Precision': 1.0, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.96, 'eval_runtime': 0.381, 'eval_samples_per_second': 375.313, 'eval_steps_per_second': 23.621}

🔍 Evaluating model: gulowmgl/checkpoint-418


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for gulowmgl/checkpoint-418
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.12895627319812775, 'eval_model_preparation_time': 0.0131, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9976331360946745, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.383, 'eval_samples_per_second': 373.345, 'eval_steps_per_second': 23.497}

🔍 Evaluating model: jvf2j5zv/checkpoint-76


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping jvf2j5zv/checkpoint-76

🔍 Evaluating model: jvf2j5zv/checkpoint-266


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for jvf2j5zv/checkpoint-266
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.13314110040664673, 'eval_model_preparation_time': 0.013, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9988165680473373, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3788, 'eval_samples_per_second': 377.483, 'eval_steps_per_second': 23.758}

🔍 Evaluating model: jvf2j5zv/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for jvf2j5zv/checkpoint-228
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.14308039844036102, 'eval_model_preparation_time': 0.0129, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9988165680473373, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3783, 'eval_samples_per_second': 377.995, 'eval_steps_per_second': 23.79}

🔍 Evaluating model: jvf2j5zv/checkpoint-152


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for jvf2j5zv/checkpoint-152
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.17804013192653656, 'eval_model_preparation_time': 0.0129, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.998224852071006, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.38, 'eval_samples_per_second': 376.322, 'eval_steps_per_second': 23.685}

🔍 Evaluating model: jvf2j5zv/checkpoint-190


✅ AUC_ROC > 0.99 for jvf2j5zv/checkpoint-190
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.14329198002815247, 'eval_model_preparation_time': 0.0129, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9988165680473373, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3787, 'eval_samples_per_second': 377.588, 'eval_steps_per_second': 23.764}

🔍 Evaluating model: g66590oz/checkpoint-266


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping g66590oz/checkpoint-266

🔍 Evaluating model: g66590oz/checkpoint-304


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping g66590oz/checkpoint-304

🔍 Evaluating model: g66590oz/checkpoint-114


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping g66590oz/checkpoint-114

🔍 Evaluating model: g66590oz/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping g66590oz/checkpoint-228

🔍 Evaluating model: g66590oz/checkpoint-190


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping g66590oz/checkpoint-190

🔍 Evaluating model: o1vbzyf5/checkpoint-76


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping o1vbzyf5/checkpoint-76

🔍 Evaluating model: o1vbzyf5/checkpoint-266


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping o1vbzyf5/checkpoint-266

🔍 Evaluating model: o1vbzyf5/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping o1vbzyf5/checkpoint-228

🔍 Evaluating model: o1vbzyf5/checkpoint-152


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping o1vbzyf5/checkpoint-152

🔍 Evaluating model: o1vbzyf5/checkpoint-190


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping o1vbzyf5/checkpoint-190

🔍 Evaluating model: 3wzclq6q/checkpoint-266


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 3wzclq6q/checkpoint-266

🔍 Evaluating model: 3wzclq6q/checkpoint-304


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 3wzclq6q/checkpoint-304

🔍 Evaluating model: 3wzclq6q/checkpoint-114


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 3wzclq6q/checkpoint-114

🔍 Evaluating model: 3wzclq6q/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 3wzclq6q/checkpoint-228

🔍 Evaluating model: 3wzclq6q/checkpoint-190


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 3wzclq6q/checkpoint-190

🔍 Evaluating model: lm8vser5/checkpoint-76


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping lm8vser5/checkpoint-76

🔍 Evaluating model: lm8vser5/checkpoint-266


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for lm8vser5/checkpoint-266
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.16216017305850983, 'eval_model_preparation_time': 0.0135, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9988165680473373, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3793, 'eval_samples_per_second': 377.045, 'eval_steps_per_second': 23.73}

🔍 Evaluating model: lm8vser5/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for lm8vser5/checkpoint-228
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.18674971163272858, 'eval_model_preparation_time': 0.0129, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.998224852071006, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3793, 'eval_samples_per_second': 376.996, 'eval_steps_per_second': 23.727}

🔍 Evaluating model: lm8vser5/checkpoint-152


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for lm8vser5/checkpoint-152
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.26440030336380005, 'eval_model_preparation_time': 0.013, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9976331360946745, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3794, 'eval_samples_per_second': 376.943, 'eval_steps_per_second': 23.724}

🔍 Evaluating model: lm8vser5/checkpoint-190


✅ AUC_ROC > 0.99 for lm8vser5/checkpoint-190
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.21533715724945068, 'eval_model_preparation_time': 0.0128, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9976331360946746, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3805, 'eval_samples_per_second': 375.772, 'eval_steps_per_second': 23.65}

🔍 Evaluating model: btpw4450/checkpoint-76


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping btpw4450/checkpoint-76

🔍 Evaluating model: btpw4450/checkpoint-266


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping btpw4450/checkpoint-266

🔍 Evaluating model: btpw4450/checkpoint-228


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping btpw4450/checkpoint-228

🔍 Evaluating model: btpw4450/checkpoint-152


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping btpw4450/checkpoint-152

🔍 Evaluating model: btpw4450/checkpoint-190


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping btpw4450/checkpoint-190

🔍 Evaluating model: k121h7u1/checkpoint-304


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping k121h7u1/checkpoint-304

🔍 Evaluating model: k121h7u1/checkpoint-228


❌ Skipping k121h7u1/checkpoint-228

🔍 Evaluating model: k121h7u1/checkpoint-342


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping k121h7u1/checkpoint-342

🔍 Evaluating model: k121h7u1/checkpoint-380


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping k121h7u1/checkpoint-380

🔍 Evaluating model: k121h7u1/checkpoint-418


❌ Skipping k121h7u1/checkpoint-418

🔍 Evaluating model: 2diwcmfr/checkpoint-266


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 2diwcmfr/checkpoint-266
📌 Test Results: {'eval_mcc_metric': 0.9570948408340445, 'eval_loss': 0.13525691628456116, 'eval_model_preparation_time': 0.0129, 'eval_Accuracy': 0.993006993006993, 'eval_AUC-ROC': 0.9982248520710059, 'eval_Precision': 1.0, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.96, 'eval_runtime': 0.3913, 'eval_samples_per_second': 365.484, 'eval_steps_per_second': 23.002}

🔍 Evaluating model: 2diwcmfr/checkpoint-456


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 2diwcmfr/checkpoint-456
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.12335965037345886, 'eval_model_preparation_time': 0.0131, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9982248520710059, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3928, 'eval_samples_per_second': 364.084, 'eval_steps_per_second': 22.914}

🔍 Evaluating model: 2diwcmfr/checkpoint-342


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 2diwcmfr/checkpoint-342
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.1257503479719162, 'eval_model_preparation_time': 0.013, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9982248520710059, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3912, 'eval_samples_per_second': 365.547, 'eval_steps_per_second': 23.006}

🔍 Evaluating model: 2diwcmfr/checkpoint-380


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 2diwcmfr/checkpoint-380
📌 Test Results: {'eval_mcc_metric': 0.9570948408340445, 'eval_loss': 0.11523086577653885, 'eval_model_preparation_time': 0.0136, 'eval_Accuracy': 0.993006993006993, 'eval_AUC-ROC': 0.9976331360946745, 'eval_Precision': 1.0, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.96, 'eval_runtime': 0.3919, 'eval_samples_per_second': 364.857, 'eval_steps_per_second': 22.963}

🔍 Evaluating model: 2diwcmfr/checkpoint-418


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 2diwcmfr/checkpoint-418
📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.1249915137887001, 'eval_model_preparation_time': 0.0132, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9982248520710059, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3942, 'eval_samples_per_second': 362.729, 'eval_steps_per_second': 22.829}


### The best model: lm8vser5/checkpoint-266

trainable params: 1,640,458 || all params: 47,213,588 || trainable%: 3.4745

📌 Test Results: {'eval_mcc_metric': 0.9128709291752768, 'eval_loss': 0.16216017305850983, 'eval_model_preparation_time': 0.0135, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9988165680473373, 'eval_Precision': 1.0, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.9166666666666666, 'eval_runtime': 0.3793, 'eval_samples_per_second': 377.045, 'eval_steps_per_second': 23.73}



## Load and Merge the best model

In [31]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    'ibm/MoLFormer-XL-both-10pct',
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [32]:
from peft import PeftModel  

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/models_molformer_clintox/lm8vser5/checkpoint-266")



In [33]:
final_model_clintox_molformer= adapter_model.merge_and_unload()

### Save model to Chemberta finetuned model lora 100M MTR

In [34]:
save_path = "/home/raghvendra2/Molformer_Finetuning/Clintox_Final_Molformer_model"

final_model_clintox_molformer.save_pretrained(save_path)

