## Finetuning Chemberata Model for Clintox

In [1]:
#Importing Libraries

import evaluate
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score,matthews_corrcoef
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import Dataset
from datasets import load_dataset
import datasets
from peft import LoraConfig, get_peft_model
import torch
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
from transformers import Trainer
from transformers import TrainingArguments, EarlyStoppingCallback

import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
2025-04-10 10:26:45.568083: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-10 10:26:45.588560: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744273605.614111 1883300 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744273605.621907 1883300 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744273605.641323 1883300 computation_placer.cc:177] computation placer already r

## Loading Tokenizer and model


### Chemberta 77M MLM Model

In [2]:
### Chemberta 77M MTR Model

from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the tokenizer
tokenizer_clin = AutoTokenizer.from_pretrained(
    "DeepChem/ChemBERTa-5M-MTR",
    trust_remote_code=True
)

# Load the model with a classification head
model_clin = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-5M-MTR",
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Loading Dataset

In [3]:
import pandas as pd

train_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_train.csv')
val_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_valid.csv')

In [4]:
smiles_list_clin = train_clin['smiles'].tolist()
smiles_val_clin=val_clin['smiles'].tolist()
train_tokenized_clin=tokenizer_clin(smiles_list_clin)
val_tokenized_clin=tokenizer_clin(smiles_val_clin)


In [5]:
from datasets import Dataset
train_dataset_clin = Dataset.from_dict(train_tokenized_clin)
val_dataset_clin = Dataset.from_dict(val_tokenized_clin)


In [6]:
train_labels_clin = train_clin['CT_TOX'].tolist() # Assuming tasks start from column 1
val_labels_clin = val_clin['CT_TOX'].tolist()

In [7]:
train_dataset_clin = train_dataset_clin.add_column("labels", train_labels_clin)
val_dataset_clin = val_dataset_clin.add_column("labels", val_labels_clin)

In [8]:
def data_load():
    train_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_train.csv')
    val_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_test.csv')

    return train_clin, val_clin

In [9]:
def data_prep(data_process,tokenizer_clin):

    smiles_list_clin = data_process['smiles'].tolist()
    tokenized_clin=tokenizer_clin(smiles_list_clin)
    
    
    dataset_clin = Dataset.from_dict(tokenized_clin)
    

    labels_clin = data_process['CT_TOX'].tolist() # Assuming tasks start from column 1
    
    dataset_clin = dataset_clin.add_column("labels", labels_clin)
    

    return dataset_clin


In [10]:
from peft import LoraConfig, get_peft_model, PeftModel

def lora_config(r, lora_alpha, dropout):

    lora_config = LoraConfig(
        task_type="SEQ_CLS",  # Sequence classification task
        r=r,  # Rank of LoRA matrices
        lora_alpha=lora_alpha,  # Scaling factor double of rank( from the rule of thumb)
        target_modules='all-linear',
        lora_dropout=dropout  # Dropout rate
        #init_lora_weights="gaussian"
    )

    return lora_config


In [11]:
import torch


class_weights= [1-(train_dataset_clin['labels'].count(0)/len(train_dataset_clin['labels'])),
                           1-(train_dataset_clin['labels'].count(1)/len(train_dataset_clin['labels']))]

class_weights = torch.from_numpy(np.array(class_weights)).float().to("cuda")

class WeightedLossTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):

        outputs = model(**inputs)
        logits = outputs.get("logits")

        # Extract labels
        labels = inputs.get("labels")

        # compute custom loss (suppose one has 2 labels with different weights)
        loss_func = torch.nn.CrossEntropyLoss(weight=class_weights)

        # compute loss
        loss = loss_func(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [12]:
import wandb
import os

# initialize wandb with sweep 
def run_training():

    run = wandb.init(project="Clintox Hyperparameter Tuning")
    config = run.config
    

    tokenizer_clin = AutoTokenizer.from_pretrained(
    "DeepChem/ChemBERTa-5M-MTR",
    trust_remote_code=True
    )

    save_dir = f"./clintox_models_5MTR/{wandb.run.id}"  # Unique directory for each run
    os.makedirs(save_dir, exist_ok=True)

#Data

    train_data, val_data=data_load()
    training_data=data_prep(train_data,tokenizer_clin)
    validation_data=data_prep(val_data,tokenizer_clin)    

    

# Load the model with a classification head
    model_clin = AutoModelForSequenceClassification.from_pretrained(
        "DeepChem/ChemBERTa-5M-MTR",
        num_labels=2,
        problem_type="single_label_classification",    
        trust_remote_code=True
    )

    peft_config = lora_config(config.r, config.lora_alpha, config.dropout)
    lora_model = get_peft_model(model_clin, peft_config)

    training_args = TrainingArguments(
    output_dir=save_dir,
    evaluation_strategy="epoch",
    learning_rate=config.lr,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=20,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs_clin",
    logging_strategy="steps",
    logging_steps=100,
    report_to="wandb",
    save_total_limit=5,
    load_best_model_at_end=True,
    metric_for_best_model="eval_mcc_metric"
    )


    accuracy_metric = evaluate.load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probabilities = softmax(logits, axis=1)[:, 1]  # Get probabilities for class 1
        predictions = np.argmax(logits, axis=1)  # Choose the most likely class
        mcc = matthews_corrcoef(labels, predictions)

        return {
            "eval_mcc_metric": mcc,
            "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities),  # AUC-ROC requires probabilities
            "Precision": precision_score(labels, predictions),
            "Recall": recall_score(labels, predictions),
            "F1-score": f1_score(labels, predictions)
        }


   

    trainer= WeightedLossTrainer(
    model=lora_model,
    args=training_args,
    train_dataset=training_data,
    eval_dataset=validation_data,
    tokenizer=tokenizer_clin,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )
    

    
    trainer.train()
    trainer.save_model(save_dir)
        
    print(f"Model saved to {save_dir}")

    wandb.finish()

   




In [13]:
 # Define the sweep configuration
def main():

    sweep_config = {
    "name": "Clintox Hyperparameter Tuning",
    "method": "bayes",
    "metric": {
        "goal": "maximize",
        "name": "eval/mcc_metric"
        },
    "parameters": {
        "lr": {
        "distribution": "uniform",
                "min": 1e-5,
                "max": 2e-5
        },
        "r": {
            "values": [8,16,32,64,128]
        },
        "lora_alpha": {
            "values": [16,32,64, 128]
        },
        "dropout": {
            "values": [0.1, 0.2]
        }
    }
    }
    sweep_id = wandb.sweep(sweep_config, project="huggingface")
    wandb.agent(sweep_id, function=run_training, count=10)

    api = wandb.Api()
    sweep = api.sweep(f"huggingface/{sweep_id}")
    print(sweep.runs[0].summary_metrics)

    runs_with_eval_loss = [run for run in sweep.runs if 'eval/mcc_metric' in run.summary_metrics]

    if runs_with_eval_loss:
        best_run = sorted(runs_with_eval_loss, key=lambda run: run.summary_metrics['eval/mcc_metric'],reverse=False)[0]
    else:
        raise ValueError("No runs found with 'eval/mcc_metric' metric.")

    best_hyperparameters = best_run.config
    print(best_hyperparameters)

if __name__ == "__main__":
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    main()

Create sweep with ID: sdvave8e
Sweep URL: https://wandb.ai/harodharsha21-iit-ropar/huggingface/sweeps/sdvave8e


[34m[1mwandb[0m: Agent Starting Run: mc7f44lm with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 32
[34m[1mwandb[0m: 	lr: 1.5625314124316165e-05
[34m[1mwandb[0m: 	r: 8
[34m[1mwandb[0m: Currently logged in as: [33mharodharsha21[0m ([33mharodharsha21-iit-ropar[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(


[2025-04-10 10:39:26,808] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/storage/qnap_home/raghvendra2/micromamba/envs/Molformer/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/storage/qnap_home/raghvendra2/micromamba/envs/Molformer/compiler_compat/ld: cannot find -lcufile: No such file or directory
collect2: error: ld returned 1 exit status
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.667858,0.234196,0.643357,0.779882,0.172414,0.769231,0.28169
2,No log,0.6369,0.337409,0.762238,0.857396,0.243902,0.769231,0.37037
3,0.653800,0.606263,0.494244,0.853147,0.898817,0.366667,0.846154,0.511628
4,0.653800,0.573562,0.53115,0.874126,0.931361,0.407407,0.846154,0.55
5,0.653800,0.539563,0.544683,0.881119,0.953254,0.423077,0.846154,0.564103
6,0.570400,0.506029,0.573975,0.895105,0.963905,0.458333,0.846154,0.594595
7,0.570400,0.471814,0.589891,0.902098,0.968639,0.478261,0.846154,0.611111
8,0.479400,0.437992,0.656103,0.916084,0.973373,0.521739,0.923077,0.666667
9,0.479400,0.405729,0.656103,0.916084,0.975148,0.521739,0.923077,0.666667
10,0.479400,0.375672,0.656103,0.916084,0.97574,0.521739,0.923077,0.666667


Model saved to ./clintox_models_5MTR/mc7f44lm


0,1
eval/AUC-ROC,▁▄▅▆▇▇███████
eval/Accuracy,▁▄▆▇▇▇███████
eval/F1-score,▁▃▅▆▆▇▇██████
eval/Precision,▁▂▅▆▆▇▇██████
eval/Recall,▁▁▅▅▅▅▅██████
eval/loss,█▇▇▆▅▅▄▄▃▂▂▁▁
eval/mcc_metric,▁▃▅▆▆▇▇██████
eval/runtime,█▁▁▁▁▁▁▁▁▁▁▁▁
eval/samples_per_second,▁█████▇█▇████
eval/steps_per_second,▁█████▇█▇████

0,1
eval/AUC-ROC,0.97811
eval/Accuracy,0.91608
eval/F1-score,0.66667
eval/Precision,0.52174
eval/Recall,0.92308
eval/loss,0.30919
eval/mcc_metric,0.6561
eval/runtime,0.1166
eval/samples_per_second,1225.896
eval/steps_per_second,42.863


[34m[1mwandb[0m: Agent Starting Run: 8nooftvw with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 16
[34m[1mwandb[0m: 	lr: 1.3801082904795e-05
[34m[1mwandb[0m: 	r: 8


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.667631,0.29659,0.72028,0.810651,0.212766,0.769231,0.333333
2,No log,0.645106,0.40716,0.79021,0.881065,0.282051,0.846154,0.423077
3,0.660300,0.623923,0.451925,0.825175,0.917751,0.323529,0.846154,0.468085
4,0.660300,0.602924,0.494244,0.853147,0.936686,0.366667,0.846154,0.511628
5,0.660300,0.582488,0.494244,0.853147,0.946746,0.366667,0.846154,0.511628
6,0.603100,0.563357,0.505984,0.86014,0.952071,0.37931,0.846154,0.52381
7,0.603100,0.544299,0.53115,0.874126,0.955621,0.407407,0.846154,0.55
8,0.548500,0.525051,0.53115,0.874126,0.960355,0.407407,0.846154,0.55
9,0.548500,0.506505,0.544683,0.881119,0.963905,0.423077,0.846154,0.564103
10,0.548500,0.488868,0.544683,0.881119,0.964497,0.423077,0.846154,0.564103


Model saved to ./clintox_models_5MTR/8nooftvw


0,1
eval/AUC-ROC,▁▄▆▇▇▇▇██████████
eval/Accuracy,▁▄▅▇▇▇▇▇█████████
eval/F1-score,▁▄▅▆▆▆▇▇█████████
eval/Precision,▁▃▄▆▆▆▇▇▇▇▇██████
eval/Recall,▁████████████████
eval/loss,█▇▇▆▆▅▅▄▄▃▃▂▂▂▁▁▁
eval/mcc_metric,▁▄▅▆▆▇▇▇█████████
eval/runtime,▄█▃▃▃▄▄▃▃▄▃▃▁▃▄▂▁
eval/samples_per_second,▅▁▆▆▆▅▅▆▆▅▆▆█▆▅▇█
eval/steps_per_second,▅▁▆▆▆▅▅▆▆▅▆▆█▆▅▇█

0,1
eval/AUC-ROC,0.97101
eval/Accuracy,0.88811
eval/F1-score,0.57895
eval/Precision,0.44
eval/Recall,0.84615
eval/loss,0.40965
eval/mcc_metric,0.55893
eval/runtime,0.1159
eval/samples_per_second,1234.157
eval/steps_per_second,43.152


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vthdn47v with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 32
[34m[1mwandb[0m: 	lr: 1.3846754312549576e-05
[34m[1mwandb[0m: 	r: 16


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.656298,0.29277,0.615385,0.891124,0.181818,0.923077,0.303797
2,No log,0.631917,0.380266,0.727273,0.940237,0.24,0.923077,0.380952
3,0.655800,0.607393,0.519615,0.846154,0.956213,0.363636,0.923077,0.521739
4,0.655800,0.581655,0.530595,0.853147,0.962722,0.375,0.923077,0.533333
5,0.655800,0.555064,0.553988,0.867133,0.972189,0.4,0.923077,0.55814
6,0.588500,0.528209,0.579569,0.881119,0.973964,0.428571,0.923077,0.585366
7,0.588500,0.50044,0.579569,0.881119,0.975148,0.428571,0.923077,0.585366
8,0.516600,0.471939,0.593306,0.888112,0.97574,0.444444,0.923077,0.6
9,0.516600,0.444017,0.622978,0.902098,0.976331,0.48,0.923077,0.631579
10,0.516600,0.41704,0.622978,0.902098,0.977515,0.48,0.923077,0.631579


Model saved to ./clintox_models_5MTR/vthdn47v


0,1
eval/AUC-ROC,▁▅▆▇▇▇▇█████████████
eval/Accuracy,▁▄▆▆▇▇▇▇████████████
eval/F1-score,▁▂▅▅▆▆▆▆▇▇▇█████████
eval/Precision,▁▂▅▅▅▆▆▆▇▇▇█████████
eval/Recall,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,██▇▇▆▆▅▄▄▃▃▂▂▂▂▁▁▁▁▁
eval/mcc_metric,▁▃▅▅▆▆▆▇▇▇▇█████████
eval/runtime,▂▂▄▄▂▃▄▅▆▆▃▄▄▄█▃▁▂▂▁
eval/samples_per_second,▆▇▅▅▇▆▄▄▃▃▆▅▅▅▁▆█▇▇█
eval/steps_per_second,▆▇▅▅▇▆▄▄▃▃▆▅▅▅▁▆█▇▇█

0,1
eval/AUC-ROC,0.98166
eval/Accuracy,0.92308
eval/F1-score,0.68571
eval/Precision,0.54545
eval/Recall,0.92308
eval/loss,0.29693
eval/mcc_metric,0.6742
eval/runtime,0.1157
eval/samples_per_second,1235.812
eval/steps_per_second,43.21


[34m[1mwandb[0m: Agent Starting Run: wlgpdy7b with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.1180080096838924e-05
[34m[1mwandb[0m: 	r: 128


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.661399,0.423972,0.832168,0.899408,0.322581,0.769231,0.454545
2,No log,0.632249,0.553988,0.867133,0.962722,0.4,0.923077,0.55814
3,0.654100,0.599104,0.607752,0.895105,0.974556,0.461538,0.923077,0.615385
4,0.654100,0.557657,0.622978,0.902098,0.976923,0.48,0.923077,0.631579
5,0.654100,0.50796,0.639064,0.909091,0.980473,0.5,0.923077,0.648649
6,0.560900,0.453885,0.639064,0.909091,0.979882,0.5,0.923077,0.648649
7,0.560900,0.395651,0.639064,0.909091,0.979882,0.5,0.923077,0.648649
8,0.422100,0.338696,0.639064,0.909091,0.980473,0.5,0.923077,0.648649
9,0.422100,0.288334,0.6742,0.923077,0.981657,0.545455,0.923077,0.685714
10,0.422100,0.246778,0.6742,0.923077,0.98284,0.545455,0.923077,0.685714


Model saved to ./clintox_models_5MTR/wlgpdy7b


0,1
eval/AUC-ROC,▁▆▇▇▇▇▇▇▇▇███████
eval/Accuracy,▁▃▅▆▆▆▆▆▇▇███████
eval/F1-score,▁▄▅▆▆▆▆▆▇▇▇██████
eval/Precision,▁▃▅▅▅▅▅▅▇▇▇██████
eval/Recall,▁████████████████
eval/loss,██▇▇▆▅▄▄▃▂▂▂▁▁▁▁▁
eval/mcc_metric,▁▄▅▆▆▆▆▆▇▇███████
eval/runtime,█▄▃▁▄▃▁▂▄▂▄▅▃▅▃▃▃
eval/samples_per_second,▁▅▆█▅▆█▇▅▇▅▄▆▄▅▆▆
eval/steps_per_second,▁▅▆█▅▆█▇▅▇▅▄▆▄▅▆▆

0,1
eval/AUC-ROC,0.99112
eval/Accuracy,0.93706
eval/F1-score,0.72727
eval/Precision,0.6
eval/Recall,0.92308
eval/loss,0.15896
eval/mcc_metric,0.71409
eval/runtime,0.1225
eval/samples_per_second,1167.116
eval/steps_per_second,40.808


[34m[1mwandb[0m: Agent Starting Run: q33we7xu with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.4145154504529838e-05
[34m[1mwandb[0m: 	r: 16


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.646277,0.327912,0.664336,0.928994,0.20339,0.923077,0.333333
2,No log,0.606038,0.530595,0.853147,0.966272,0.375,0.923077,0.533333
3,0.641600,0.555754,0.579569,0.881119,0.97574,0.428571,0.923077,0.585366
4,0.641600,0.493843,0.593306,0.888112,0.978698,0.444444,0.923077,0.6
5,0.641600,0.424409,0.639064,0.909091,0.979882,0.5,0.923077,0.648649
6,0.507500,0.357092,0.693479,0.93007,0.981065,0.571429,0.923077,0.705882
7,0.507500,0.296266,0.693479,0.93007,0.984024,0.571429,0.923077,0.705882
8,0.337000,0.247358,0.693479,0.93007,0.985799,0.571429,0.923077,0.705882
9,0.337000,0.211979,0.693479,0.93007,0.988757,0.571429,0.923077,0.705882
10,0.337000,0.186486,0.736192,0.944056,0.990533,0.631579,0.923077,0.75


Model saved to ./clintox_models_5MTR/q33we7xu


0,1
eval/AUC-ROC,▁▅▆▇▇▇▇▇██████████
eval/Accuracy,▁▅▆▆▆▇▇▇▇▇▇███████
eval/F1-score,▁▄▄▄▅▆▆▆▆▆▆████▇▇▇
eval/Precision,▁▃▃▄▄▅▅▅▅▆▆▇██████
eval/Recall,███████████████▁▁▁
eval/loss,█▇▇▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁
eval/mcc_metric,▁▄▄▄▅▆▆▆▆▆▆████▇▇▇
eval/runtime,▁▄▁▅▄▇▃▅█▆▅▆▅▅▅▁▃▄
eval/samples_per_second,█▅█▄▅▂▆▄▁▃▄▃▄▄▄█▆▅
eval/steps_per_second,█▅█▄▅▂▆▄▁▃▄▃▄▄▄█▆▅

0,1
eval/AUC-ROC,0.99112
eval/Accuracy,0.97203
eval/F1-score,0.84615
eval/Precision,0.84615
eval/Recall,0.84615
eval/loss,0.16799
eval/mcc_metric,0.83077
eval/runtime,0.1171
eval/samples_per_second,1220.796
eval/steps_per_second,42.685


[34m[1mwandb[0m: Agent Starting Run: 8mbsk0ip with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.4078246398358415e-05
[34m[1mwandb[0m: 	r: 32


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.661186,0.352541,0.776224,0.847929,0.25641,0.769231,0.384615
2,No log,0.625229,0.553988,0.867133,0.933136,0.4,0.923077,0.55814
3,0.651600,0.581144,0.579569,0.881119,0.956213,0.428571,0.923077,0.585366
4,0.651600,0.522313,0.579569,0.881119,0.966864,0.428571,0.923077,0.585366
5,0.651600,0.453382,0.607752,0.895105,0.976331,0.461538,0.923077,0.615385
6,0.527600,0.38174,0.639064,0.909091,0.977515,0.5,0.923077,0.648649
7,0.527600,0.3123,0.6742,0.923077,0.982249,0.545455,0.923077,0.685714
8,0.341000,0.254942,0.693479,0.93007,0.984024,0.571429,0.923077,0.705882
9,0.341000,0.213846,0.693479,0.93007,0.986391,0.571429,0.923077,0.705882
10,0.341000,0.18641,0.714086,0.937063,0.986982,0.6,0.923077,0.727273


Model saved to ./clintox_models_5MTR/8mbsk0ip


0,1
eval/AUC-ROC,▁▅▆▇▇▇█████████████
eval/Accuracy,▁▄▅▅▆▆▇▇▇▇▇████████
eval/F1-score,▁▄▄▄▅▅▆▆▆▇▇████▇▇▇▇
eval/Precision,▁▃▄▄▄▅▆▆▆▆▇▇▇██████
eval/Recall,▁██████████████▅▅▅▅
eval/loss,█▇▇▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁
eval/mcc_metric,▁▄▅▅▅▆▆▇▇▇▇████▇▇▇▇
eval/runtime,▆▄▂█▅▃▃▅▇▆▄▃▄▃▂▂▁▁▁
eval/samples_per_second,▃▅▇▁▄▅▆▄▂▃▅▆▅▆▇▇███
eval/steps_per_second,▃▅▇▁▄▅▆▄▂▃▅▆▅▆▇▇███

0,1
eval/AUC-ROC,0.98935
eval/Accuracy,0.95105
eval/F1-score,0.75862
eval/Precision,0.6875
eval/Recall,0.84615
eval/loss,0.16851
eval/mcc_metric,0.73659
eval/runtime,0.1157
eval/samples_per_second,1235.54
eval/steps_per_second,43.201


[34m[1mwandb[0m: Agent Starting Run: pycdsmqi with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.4764811831586508e-05
[34m[1mwandb[0m: 	r: 32


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.650573,0.257447,0.48951,0.957396,0.151163,1.0,0.262626
2,No log,0.609546,0.498779,0.804196,0.972189,0.317073,1.0,0.481481
3,0.645900,0.556805,0.62698,0.888112,0.974556,0.448276,1.0,0.619048
4,0.645900,0.48698,0.622978,0.902098,0.977515,0.48,0.923077,0.631579
5,0.645900,0.405525,0.639064,0.909091,0.981657,0.5,0.923077,0.648649
6,0.496600,0.325156,0.656103,0.916084,0.980473,0.521739,0.923077,0.666667
7,0.496600,0.255542,0.6742,0.923077,0.98284,0.545455,0.923077,0.685714
8,0.292500,0.206902,0.693479,0.93007,0.984024,0.571429,0.923077,0.705882
9,0.292500,0.176737,0.693479,0.93007,0.987574,0.571429,0.923077,0.705882
10,0.292500,0.159454,0.714086,0.937063,0.989941,0.6,0.923077,0.727273


Model saved to ./clintox_models_5MTR/pycdsmqi


0,1
eval/AUC-ROC,▁▄▄▅▆▅▆▆▇▇██████████
eval/Accuracy,▁▆▇▇▇▇▇█████████████
eval/F1-score,▁▄▆▆▆▆▇▇▇▇▇▇▇███████
eval/Precision,▁▃▅▅▅▆▆▆▆▇▇▇▇███████
eval/Recall,███▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▇▇▆▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁
eval/mcc_metric,▁▄▆▆▆▆▇▇▇▇▇▇▇███████
eval/runtime,▂▃▁▂▂▂▂▂▁▂▂▅▂▂▂▁▁▃▁█
eval/samples_per_second,▇▆█▇▇▇▇▇█▇▇▄▇▇▇██▆█▁
eval/steps_per_second,▇▆█▇▇▇▇▇█▇▇▄▇▇▇██▆█▁

0,1
eval/AUC-ROC,0.99349
eval/Accuracy,0.95804
eval/F1-score,0.8
eval/Precision,0.70588
eval/Recall,0.92308
eval/loss,0.14731
eval/mcc_metric,0.78576
eval/runtime,0.1278
eval/samples_per_second,1118.781
eval/steps_per_second,39.118


[34m[1mwandb[0m: Agent Starting Run: mcnkpsf4 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.4864696197692065e-05
[34m[1mwandb[0m: 	r: 16


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.650465,0.394785,0.811189,0.921893,0.294118,0.769231,0.425532
2,No log,0.608283,0.509065,0.839161,0.959172,0.352941,0.923077,0.510638
3,0.640200,0.556142,0.593306,0.888112,0.966272,0.444444,0.923077,0.6
4,0.640200,0.488958,0.639064,0.909091,0.974556,0.5,0.923077,0.648649
5,0.640200,0.414685,0.639064,0.909091,0.976923,0.5,0.923077,0.648649
6,0.493900,0.341837,0.639064,0.909091,0.981657,0.5,0.923077,0.648649
7,0.493900,0.275648,0.639064,0.909091,0.983432,0.5,0.923077,0.648649
8,0.303500,0.225172,0.693479,0.93007,0.986982,0.571429,0.923077,0.705882
9,0.303500,0.191079,0.714086,0.937063,0.990533,0.6,0.923077,0.727273
10,0.303500,0.169976,0.736192,0.944056,0.991124,0.631579,0.923077,0.75


Model saved to ./clintox_models_5MTR/mcnkpsf4


0,1
eval/AUC-ROC,▁▅▅▆▇▇▇████████████
eval/Accuracy,▁▂▄▅▅▅▅▆▆▇▇█▇██████
eval/F1-score,▁▂▄▄▄▄▄▅▆▆▆█▇██████
eval/Precision,▁▂▃▃▃▃▃▄▄▅▅▇▇██████
eval/Recall,▁███████████▅▅▅▅▅▅▅
eval/loss,█▇▇▆▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁
eval/mcc_metric,▁▃▄▅▅▅▅▅▆▆▆█▇██████
eval/runtime,▃▁▄▄▅▄▄▄▄▄▆▇█▄▆▂▃▄▄
eval/samples_per_second,▅█▅▅▄▅▅▅▅▅▃▂▁▄▃▇▆▅▅
eval/steps_per_second,▅█▅▅▄▅▅▅▅▅▃▂▁▄▃▇▆▅▅

0,1
eval/AUC-ROC,0.99172
eval/Accuracy,0.97902
eval/F1-score,0.88
eval/Precision,0.91667
eval/Recall,0.84615
eval/loss,0.17894
eval/mcc_metric,0.86936
eval/runtime,0.1172
eval/samples_per_second,1219.984
eval/steps_per_second,42.657


[34m[1mwandb[0m: Agent Starting Run: 0jhf6w1q with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.3869899364693637e-05
[34m[1mwandb[0m: 	r: 16


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.653733,0.257447,0.48951,0.959172,0.151163,1.0,0.262626
2,No log,0.616599,0.451946,0.762238,0.977515,0.276596,1.0,0.433333
3,0.650400,0.571946,0.62698,0.888112,0.979882,0.448276,1.0,0.619048
4,0.650400,0.513417,0.687023,0.916084,0.981065,0.52,1.0,0.684211
5,0.650400,0.445117,0.687023,0.916084,0.983432,0.52,1.0,0.684211
6,0.521400,0.373625,0.704154,0.923077,0.984615,0.541667,1.0,0.702703
7,0.521400,0.303931,0.6742,0.923077,0.985207,0.545455,0.923077,0.685714
8,0.342900,0.245972,0.6742,0.923077,0.986982,0.545455,0.923077,0.685714
9,0.342900,0.203196,0.6742,0.923077,0.989941,0.545455,0.923077,0.685714
10,0.342900,0.173439,0.714086,0.937063,0.991716,0.6,0.923077,0.727273


Model saved to ./clintox_models_5MTR/0jhf6w1q


0,1
eval/AUC-ROC,▁▅▅▅▆▆▆▆▇▇██████████
eval/Accuracy,▁▅▇▇▇▇▇▇▇▇▇▇▇███████
eval/F1-score,▁▃▅▅▅▆▅▅▅▆▆▆▆▆▇▇▇▇██
eval/Precision,▁▂▄▄▄▅▅▅▅▅▅▅▅▆▆▆▇▇██
eval/Recall,██████▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,██▇▆▅▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁
eval/mcc_metric,▁▃▅▆▆▆▅▅▅▆▆▆▆▆▇▇▇▇██
eval/runtime,▁▃▆█▂▁▄▁▆▃▂▆▆▂▆▁▂▁▅▄
eval/samples_per_second,█▆▃▁▇█▅█▃▆▇▃▃█▃█▇█▄▅
eval/steps_per_second,█▆▃▁▇█▅█▃▆▇▃▃█▃█▇█▄▅

0,1
eval/AUC-ROC,0.99467
eval/Accuracy,0.98601
eval/F1-score,0.92308
eval/Precision,0.92308
eval/Recall,0.92308
eval/loss,0.1349
eval/mcc_metric,0.91538
eval/runtime,0.1181
eval/samples_per_second,1210.702
eval/steps_per_second,42.332


[34m[1mwandb[0m: Agent Starting Run: 8t48fjg0 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.4105904731579142e-05
[34m[1mwandb[0m: 	r: 16


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.652594,0.404145,0.818182,0.916568,0.30303,0.769231,0.434783
2,No log,0.613191,0.519615,0.846154,0.957396,0.363636,0.923077,0.521739
3,0.643000,0.565282,0.579569,0.881119,0.966864,0.428571,0.923077,0.585366
4,0.643000,0.503981,0.639064,0.909091,0.971006,0.5,0.923077,0.648649
5,0.643000,0.435484,0.639064,0.909091,0.976923,0.5,0.923077,0.648649
6,0.508700,0.366995,0.639064,0.909091,0.97929,0.5,0.923077,0.648649
7,0.508700,0.302012,0.639064,0.909091,0.982249,0.5,0.923077,0.648649
8,0.329200,0.248797,0.6742,0.923077,0.984024,0.545455,0.923077,0.685714
9,0.329200,0.209436,0.714086,0.937063,0.989349,0.6,0.923077,0.727273
10,0.329200,0.182966,0.714086,0.937063,0.990533,0.6,0.923077,0.727273


Model saved to ./clintox_models_5MTR/8t48fjg0


0,1
eval/AUC-ROC,▁▅▆▆▇▇▇▇████████████
eval/Accuracy,▁▂▄▅▅▅▅▆▆▆▆▇▇▇██████
eval/F1-score,▁▂▃▄▄▄▄▅▆▆▆▆▇▇██████
eval/Precision,▁▂▂▃▃▃▃▄▄▄▅▅▆▆██████
eval/Recall,▁████████████▅▅▅▅▅▅▅
eval/loss,█▇▇▆▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁
eval/mcc_metric,▁▃▄▅▅▅▅▅▆▆▆▆▇▆██████
eval/runtime,▁▄▄▇▄▃▄▅▇▃█▃▆▇▇▄▂▅▄▂
eval/samples_per_second,█▅▄▂▅▆▅▄▂▆▁▆▃▂▂▄▆▄▄▇
eval/steps_per_second,█▅▄▂▅▆▅▄▂▆▁▆▃▂▂▄▆▄▄▇

0,1
eval/AUC-ROC,0.99112
eval/Accuracy,0.97902
eval/F1-score,0.88
eval/Precision,0.91667
eval/Recall,0.84615
eval/loss,0.16922
eval/mcc_metric,0.86936
eval/runtime,0.1167
eval/samples_per_second,1225.155
eval/steps_per_second,42.838


{'_runtime': 50.616286505, '_step': 27, '_timestamp': 1744274900.3053336, '_wandb': {'runtime': 51}, 'eval/AUC-ROC': 0.9911242603550297, 'eval/Accuracy': 0.9790209790209792, 'eval/F1-score': 0.88, 'eval/Precision': 0.9166666666666666, 'eval/Recall': 0.8461538461538461, 'eval/loss': 0.1692180186510086, 'eval/mcc_metric': 0.8693611470909237, 'eval/runtime': 0.1167, 'eval/samples_per_second': 1225.155, 'eval/steps_per_second': 42.838, 'total_flos': 72122343547128, 'train/epoch': 20, 'train/global_step': 760, 'train/grad_norm': 2.6312620639801025, 'train/learning_rate': 1.113624057756248e-06, 'train/loss': 0.1468, 'train_loss': 0.2880080223083496, 'train_runtime': 45.3928, 'train_samples_per_second': 522.109, 'train_steps_per_second': 16.743}


### Evaluation

In [14]:
# Load the model with a classification head
from transformers import AutoModelForSequenceClassification, AutoTokenizer

base_model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-5M-MTR",
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

tokenizer_clin = AutoTokenizer.from_pretrained(
    "DeepChem/ChemBERTa-5M-MTR",
    trust_remote_code=True
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [39]:
from peft import PeftModel

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/clintox_models_77mtr_re/ry12c9lj/checkpoint-600")



In [15]:
import pandas as pd

test_data_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_test.csv')

smiles_test_clin = test_data_clin['smiles'].tolist()

test_tokenized_clin =tokenizer_clin(smiles_test_clin)

test_dataset_clin = Dataset.from_dict(test_tokenized_clin)

test_labels_clin = test_data_clin['CT_TOX'].tolist() # Assuming tasks start from column 1


test_dataset_clin = test_dataset_clin.add_column("labels", test_labels_clin)


In [19]:
from evaluate import load
import numpy as np
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

accuracy_metric = load("accuracy")

def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probabilities = softmax(logits, axis=1)[:, 1]  # Get probabilities for class 1
        predictions = np.argmax(logits, axis=1)  # Choose the most likely class
        mcc = matthews_corrcoef(labels, predictions)
        
        

        return {
            "eval_mcc_metric": mcc,
            "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities),  # AUC-ROC requires probabilities
            "Precision": precision_score(labels, predictions),
            "Recall": recall_score(labels, predictions),
            "F1-score": f1_score(labels, predictions)
        }
    

In [17]:
training_args = TrainingArguments(
    output_dir="./test_results_clintox2",
    per_device_eval_batch_size=32,
    report_to="none",  # Disable logging to W&B for test
    seed=42,  # Ensures reproducibility

)


### For model tyfuunip 77MLM model

In [None]:
test_results_clin = trainer.evaluate()

print("Test Results for model :", test_results_clin)

prob:  [0.4401219  0.4399929  0.44660157 0.45053703 0.44525385 0.4580119
 0.46075583 0.46209157 0.5033227  0.45664516 0.4532183  0.45829377
 0.4509178  0.4523891  0.45217815 0.51477736 0.45290527 0.45750266
 0.45052534 0.45100486 0.45547527 0.45262635 0.46124303 0.4483394
 0.4528426  0.4561485  0.4517518  0.5295794  0.46028766 0.4568578
 0.46080524 0.45632154 0.42576033 0.43297708 0.43159267 0.44898543
 0.4636876  0.48366705 0.4761501  0.4557099  0.47625202 0.48437208
 0.4790405  0.4397568  0.44090763 0.4459546  0.44566756 0.44490114
 0.44374675 0.45518142 0.45188746 0.46135843 0.44551456 0.46952462
 0.4488264  0.4528965  0.43407086 0.44923997 0.45246115 0.44849825
 0.4377801  0.44890004 0.4528525  0.45194796 0.45177567 0.45127347
 0.44365412 0.43731663 0.43618667 0.4472212  0.4513229  0.45774305
 0.5249134  0.46220636 0.45378727 0.4513828  0.45773304 0.4541731
 0.45600662 0.4529879  0.45823517 0.45732948 0.45369816 0.4455454
 0.47273836 0.43125102 0.45793718 0.46125695 0.45517468 0.45

### For model 77M MLM 1m1rdktq , result similar to Molformer best model

In [34]:
test_results_clin = trainer.evaluate()

print("Test Results for model :", test_results_clin)

prob:  [2.2636575e-03 2.1273368e-03 9.4913028e-04 1.7895816e-03 1.2322032e-03
 2.2662901e-03 2.3632217e-03 2.2377539e-03 4.9768624e-01 1.2857310e-03
 9.4042718e-04 1.4716162e-03 8.0880936e-04 8.5863500e-04 7.8731659e-04
 9.9693120e-01 4.1663432e-03 9.9744927e-04 7.2505849e-04 7.9317833e-04
 8.2890649e-04 7.5041869e-04 1.1137150e-03 7.4008160e-04 7.7598670e-04
 1.2245920e-03 9.3314360e-04 9.9880087e-01 9.4802497e-04 8.9376868e-04
 1.0875771e-03 8.1587117e-04 1.2799466e-03 3.2281652e-03 1.7236605e-03
 7.7165634e-04 5.2515180e-03 3.7917960e-02 1.4309039e-02 6.1209425e-03
 1.0245502e-02 2.7188790e-01 3.3269875e-02 1.7050576e-03 1.8145476e-03
 2.6818756e-03 2.2128250e-03 2.2739777e-03 4.6588895e-03 2.6700431e-03
 1.2666737e-03 2.0346346e-03 9.2744321e-04 2.9990047e-03 1.4759921e-03
 1.8043651e-03 7.3090330e-04 9.7787206e-04 8.8727387e-04 1.0206160e-03
 1.6029282e-03 1.3530731e-03 1.5052761e-03 1.5822970e-03 1.3714175e-03
 2.1536497e-03 1.1692258e-03 8.6176163e-04 9.8773255e-04 6.5377005e-04

In [45]:
test_results_clin = trainer.evaluate()

print("Test Results for model :", test_results_clin)

prob:  [0.49189648 0.50437635 0.49710193 0.4827579  0.5103262  0.47776976
 0.5090503  0.5069377  0.48913968 0.5189021  0.5079457  0.49596068
 0.5211956  0.4894604  0.50843394 0.49846822 0.48656672 0.5055379
 0.48387793 0.4938234  0.50845313 0.504188   0.49817413 0.52219564
 0.52048296 0.4922291  0.53118414 0.47201216 0.5077607  0.5244536
 0.4820047  0.50981706 0.50513047 0.5067421  0.4950132  0.4992591
 0.47697246 0.5135397  0.49186352 0.48615742 0.47971562 0.46490225
 0.47868133 0.488366   0.486141   0.4672533  0.4727669  0.47844842
 0.4937883  0.48017076 0.51966035 0.49457708 0.4964692  0.5018369
 0.5183544  0.50759983 0.51033074 0.5088241  0.4970515  0.5005986
 0.5208653  0.5099847  0.49592677 0.50016934 0.4997075  0.50380987
 0.49675068 0.52640885 0.5133816  0.51943296 0.49700308 0.51433927
 0.49498564 0.5046982  0.4887951  0.51450986 0.5070639  0.49390793
 0.48778072 0.50806403 0.48961282 0.50639975 0.50782585 0.49060062
 0.49575263 0.5130115  0.5095936  0.48658034 0.51929677 0.50

### For 77MTR Model the training was poor, 0.50-0.60 precision

In [21]:
# List all checkpoints inside models directory
import os
from peft import PeftModel

models_dir = "./clintox_models_5MTR"

def find_all_checkpoints(base_dir):
    all_checkpoints = []
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path):
            for subfolder in os.listdir(folder_path):
                subfolder_path = os.path.join(folder_path, subfolder)
                if os.path.isdir(subfolder_path) and subfolder.startswith("checkpoint-"):
                    if os.path.exists(os.path.join(subfolder_path, "adapter_config.json")):
                        all_checkpoints.append(subfolder_path)
    return all_checkpoints

valid_checkpoints = find_all_checkpoints(models_dir)
print("🧠 Valid nested checkpoints found:", valid_checkpoints)

for checkpoint_path in valid_checkpoints:
    checkpoint_name = os.path.basename(checkpoint_path)
    parent_folder = os.path.basename(os.path.dirname(checkpoint_path))

    print(f"\n🔍 Evaluating model: {parent_folder}/{checkpoint_name}")

    adapter_model = PeftModel.from_pretrained(base_model, checkpoint_path)
    adapter_model.eval()

    trainer = WeightedLossTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset_clin,
        tokenizer=tokenizer_clin,
        compute_metrics=compute_metrics
    )


    test_results = trainer.evaluate()
    auc_score = test_results["eval_AUC-ROC"]
    

    if auc_score > 0.991:
        print(f"✅ AUC_ROC > 0.99 for {parent_folder}/{checkpoint_name}")
        print(f"📌 Test Results: {test_results}")
    else:
        print(f"❌ Skipping {parent_folder}/{checkpoint_name}")

    
    




  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🧠 Valid nested checkpoints found: ['./clintox_models_5MTR/0jhf6w1q/checkpoint-722', './clintox_models_5MTR/0jhf6w1q/checkpoint-608', './clintox_models_5MTR/0jhf6w1q/checkpoint-684', './clintox_models_5MTR/0jhf6w1q/checkpoint-646', './clintox_models_5MTR/0jhf6w1q/checkpoint-760', './clintox_models_5MTR/pycdsmqi/checkpoint-722', './clintox_models_5MTR/pycdsmqi/checkpoint-608', './clintox_models_5MTR/pycdsmqi/checkpoint-684', './clintox_models_5MTR/pycdsmqi/checkpoint-646', './clintox_models_5MTR/pycdsmqi/checkpoint-760', './clintox_models_5MTR/8mbsk0ip/checkpoint-722', './clintox_models_5MTR/8mbsk0ip/checkpoint-532', './clintox_models_5MTR/8mbsk0ip/checkpoint-608', './clintox_models_5MTR/8mbsk0ip/checkpoint-684', './clintox_models_5MTR/8mbsk0ip/checkpoint-646', './clintox_models_5MTR/wlgpdy7b/checkpoint-570', './clintox_models_5MTR/wlgpdy7b/checkpoint-532', './clintox_models_5MTR/wlgpdy7b/checkpoint-608', './clintox_models_5MTR/wlgpdy7b/checkpoint-646', './clintox_models_5MTR/wlgpdy7b/ch

  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 0jhf6w1q/checkpoint-722
📌 Test Results: {'eval_mcc_metric': 0.9153846153846154, 'eval_loss': 0.13465836644172668, 'eval_model_preparation_time': 0.0042, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9946745562130177, 'eval_Precision': 0.9230769230769231, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.9230769230769231, 'eval_runtime': 0.1747, 'eval_samples_per_second': 818.635, 'eval_steps_per_second': 28.624}

🔍 Evaluating model: 0jhf6w1q/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 0jhf6w1q/checkpoint-608
📌 Test Results: {'eval_mcc_metric': 0.785756725787005, 'eval_loss': 0.13363271951675415, 'eval_model_preparation_time': 0.004, 'eval_Accuracy': 0.958041958041958, 'eval_AUC-ROC': 0.9940828402366864, 'eval_Precision': 0.7058823529411765, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8, 'eval_runtime': 0.1218, 'eval_samples_per_second': 1173.833, 'eval_steps_per_second': 41.043}

🔍 Evaluating model: 0jhf6w1q/checkpoint-684


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 0jhf6w1q/checkpoint-684
📌 Test Results: {'eval_mcc_metric': 0.8443747686898277, 'eval_loss': 0.13402533531188965, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.972027972027972, 'eval_AUC-ROC': 0.9940828402366864, 'eval_Precision': 0.8, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8571428571428571, 'eval_runtime': 0.1239, 'eval_samples_per_second': 1154.062, 'eval_steps_per_second': 40.352}

🔍 Evaluating model: 0jhf6w1q/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 0jhf6w1q/checkpoint-646
📌 Test Results: {'eval_mcc_metric': 0.8443747686898277, 'eval_loss': 0.13350766897201538, 'eval_model_preparation_time': 0.0042, 'eval_Accuracy': 0.972027972027972, 'eval_AUC-ROC': 0.9940828402366864, 'eval_Precision': 0.8, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8571428571428571, 'eval_runtime': 0.1226, 'eval_samples_per_second': 1165.986, 'eval_steps_per_second': 40.769}

🔍 Evaluating model: 0jhf6w1q/checkpoint-760


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 0jhf6w1q/checkpoint-760
📌 Test Results: {'eval_mcc_metric': 0.9153846153846154, 'eval_loss': 0.13489842414855957, 'eval_model_preparation_time': 0.0041, 'eval_Accuracy': 0.986013986013986, 'eval_AUC-ROC': 0.9946745562130177, 'eval_Precision': 0.9230769230769231, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.9230769230769231, 'eval_runtime': 0.1227, 'eval_samples_per_second': 1165.771, 'eval_steps_per_second': 40.761}

🔍 Evaluating model: pycdsmqi/checkpoint-722


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for pycdsmqi/checkpoint-722
📌 Test Results: {'eval_mcc_metric': 0.785756725787005, 'eval_loss': 0.1470877081155777, 'eval_model_preparation_time': 0.0043, 'eval_Accuracy': 0.958041958041958, 'eval_AUC-ROC': 0.9934911242603551, 'eval_Precision': 0.7058823529411765, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8, 'eval_runtime': 0.1219, 'eval_samples_per_second': 1172.801, 'eval_steps_per_second': 41.007}

🔍 Evaluating model: pycdsmqi/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for pycdsmqi/checkpoint-608
📌 Test Results: {'eval_mcc_metric': 0.785756725787005, 'eval_loss': 0.14588846266269684, 'eval_model_preparation_time': 0.004, 'eval_Accuracy': 0.958041958041958, 'eval_AUC-ROC': 0.9934911242603551, 'eval_Precision': 0.7058823529411765, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8, 'eval_runtime': 0.1229, 'eval_samples_per_second': 1163.561, 'eval_steps_per_second': 40.684}

🔍 Evaluating model: pycdsmqi/checkpoint-684


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for pycdsmqi/checkpoint-684
📌 Test Results: {'eval_mcc_metric': 0.785756725787005, 'eval_loss': 0.14644131064414978, 'eval_model_preparation_time': 0.004, 'eval_Accuracy': 0.958041958041958, 'eval_AUC-ROC': 0.9934911242603551, 'eval_Precision': 0.7058823529411765, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8, 'eval_runtime': 0.1212, 'eval_samples_per_second': 1180.283, 'eval_steps_per_second': 41.269}

🔍 Evaluating model: pycdsmqi/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for pycdsmqi/checkpoint-646
📌 Test Results: {'eval_mcc_metric': 0.785756725787005, 'eval_loss': 0.1458801031112671, 'eval_model_preparation_time': 0.004, 'eval_Accuracy': 0.958041958041958, 'eval_AUC-ROC': 0.9934911242603551, 'eval_Precision': 0.7058823529411765, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8, 'eval_runtime': 0.1225, 'eval_samples_per_second': 1167.461, 'eval_steps_per_second': 40.82}

🔍 Evaluating model: pycdsmqi/checkpoint-760


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for pycdsmqi/checkpoint-760
📌 Test Results: {'eval_mcc_metric': 0.785756725787005, 'eval_loss': 0.14731107652187347, 'eval_model_preparation_time': 0.004, 'eval_Accuracy': 0.958041958041958, 'eval_AUC-ROC': 0.9934911242603551, 'eval_Precision': 0.7058823529411765, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8, 'eval_runtime': 0.1234, 'eval_samples_per_second': 1158.549, 'eval_steps_per_second': 40.509}

🔍 Evaluating model: 8mbsk0ip/checkpoint-722


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8mbsk0ip/checkpoint-722

🔍 Evaluating model: 8mbsk0ip/checkpoint-532


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8mbsk0ip/checkpoint-532

🔍 Evaluating model: 8mbsk0ip/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8mbsk0ip/checkpoint-608

🔍 Evaluating model: 8mbsk0ip/checkpoint-684


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8mbsk0ip/checkpoint-684

🔍 Evaluating model: 8mbsk0ip/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8mbsk0ip/checkpoint-646

🔍 Evaluating model: wlgpdy7b/checkpoint-570


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping wlgpdy7b/checkpoint-570

🔍 Evaluating model: wlgpdy7b/checkpoint-532


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping wlgpdy7b/checkpoint-532

🔍 Evaluating model: wlgpdy7b/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for wlgpdy7b/checkpoint-608
📌 Test Results: {'eval_mcc_metric': 0.7140859186309388, 'eval_loss': 0.16212722659111023, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9370629370629371, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.6, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.7272727272727273, 'eval_runtime': 0.1232, 'eval_samples_per_second': 1160.825, 'eval_steps_per_second': 40.588}

🔍 Evaluating model: wlgpdy7b/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for wlgpdy7b/checkpoint-646
📌 Test Results: {'eval_mcc_metric': 0.7140859186309388, 'eval_loss': 0.15895630419254303, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9370629370629371, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.6, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.7272727272727273, 'eval_runtime': 0.1255, 'eval_samples_per_second': 1139.373, 'eval_steps_per_second': 39.838}

🔍 Evaluating model: wlgpdy7b/checkpoint-456


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping wlgpdy7b/checkpoint-456

🔍 Evaluating model: q33we7xu/checkpoint-494


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping q33we7xu/checkpoint-494

🔍 Evaluating model: q33we7xu/checkpoint-570


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for q33we7xu/checkpoint-570
📌 Test Results: {'eval_mcc_metric': 0.8780578588816019, 'eval_loss': 0.16391222178936005, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9911242603550297, 'eval_Precision': 0.8571428571428571, 'eval_Recall': 0.9230769230769231, 'eval_F1-score': 0.8888888888888888, 'eval_runtime': 0.1268, 'eval_samples_per_second': 1127.423, 'eval_steps_per_second': 39.42}

🔍 Evaluating model: q33we7xu/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for q33we7xu/checkpoint-608
📌 Test Results: {'eval_mcc_metric': 0.8307692307692308, 'eval_loss': 0.16611522436141968, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.972027972027972, 'eval_AUC-ROC': 0.9911242603550297, 'eval_Precision': 0.8461538461538461, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.8461538461538461, 'eval_runtime': 0.1302, 'eval_samples_per_second': 1098.281, 'eval_steps_per_second': 38.401}

🔍 Evaluating model: q33we7xu/checkpoint-684


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for q33we7xu/checkpoint-684
📌 Test Results: {'eval_mcc_metric': 0.8307692307692308, 'eval_loss': 0.16798891127109528, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.972027972027972, 'eval_AUC-ROC': 0.9911242603550297, 'eval_Precision': 0.8461538461538461, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.8461538461538461, 'eval_runtime': 0.1315, 'eval_samples_per_second': 1087.833, 'eval_steps_per_second': 38.036}

🔍 Evaluating model: q33we7xu/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for q33we7xu/checkpoint-646
📌 Test Results: {'eval_mcc_metric': 0.8307692307692308, 'eval_loss': 0.16694866120815277, 'eval_model_preparation_time': 0.004, 'eval_Accuracy': 0.972027972027972, 'eval_AUC-ROC': 0.9911242603550297, 'eval_Precision': 0.8461538461538461, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.8461538461538461, 'eval_runtime': 0.1276, 'eval_samples_per_second': 1120.634, 'eval_steps_per_second': 39.183}

🔍 Evaluating model: 8nooftvw/checkpoint-570


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8nooftvw/checkpoint-570

🔍 Evaluating model: 8nooftvw/checkpoint-532


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8nooftvw/checkpoint-532

🔍 Evaluating model: 8nooftvw/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8nooftvw/checkpoint-608

🔍 Evaluating model: 8nooftvw/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8nooftvw/checkpoint-646

🔍 Evaluating model: 8nooftvw/checkpoint-456


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8nooftvw/checkpoint-456

🔍 Evaluating model: vthdn47v/checkpoint-722


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping vthdn47v/checkpoint-722

🔍 Evaluating model: vthdn47v/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping vthdn47v/checkpoint-608

🔍 Evaluating model: vthdn47v/checkpoint-684


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping vthdn47v/checkpoint-684

🔍 Evaluating model: vthdn47v/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping vthdn47v/checkpoint-646

🔍 Evaluating model: vthdn47v/checkpoint-760


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping vthdn47v/checkpoint-760

🔍 Evaluating model: 8t48fjg0/checkpoint-722


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 8t48fjg0/checkpoint-722
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.16874969005584717, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1187, 'eval_samples_per_second': 1204.865, 'eval_steps_per_second': 42.128}

🔍 Evaluating model: 8t48fjg0/checkpoint-570


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping 8t48fjg0/checkpoint-570

🔍 Evaluating model: 8t48fjg0/checkpoint-684


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 8t48fjg0/checkpoint-684
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.16767965257167816, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1175, 'eval_samples_per_second': 1217.208, 'eval_steps_per_second': 42.56}

🔍 Evaluating model: 8t48fjg0/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 8t48fjg0/checkpoint-646
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.16638915240764618, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1165, 'eval_samples_per_second': 1227.979, 'eval_steps_per_second': 42.936}

🔍 Evaluating model: 8t48fjg0/checkpoint-760


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for 8t48fjg0/checkpoint-760
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.1692180186510086, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1193, 'eval_samples_per_second': 1198.461, 'eval_steps_per_second': 41.904}

🔍 Evaluating model: mcnkpsf4/checkpoint-722


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for mcnkpsf4/checkpoint-722
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.17893646657466888, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.991715976331361, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1179, 'eval_samples_per_second': 1212.964, 'eval_steps_per_second': 42.411}

🔍 Evaluating model: mcnkpsf4/checkpoint-532


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for mcnkpsf4/checkpoint-532
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.1629505604505539, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1186, 'eval_samples_per_second': 1206.229, 'eval_steps_per_second': 42.176}

🔍 Evaluating model: mcnkpsf4/checkpoint-608


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for mcnkpsf4/checkpoint-608
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.1741529256105423, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9911242603550295, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1289, 'eval_samples_per_second': 1109.274, 'eval_steps_per_second': 38.786}

🔍 Evaluating model: mcnkpsf4/checkpoint-684


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for mcnkpsf4/checkpoint-684
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.1775006651878357, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.991715976331361, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1195, 'eval_samples_per_second': 1196.799, 'eval_steps_per_second': 41.846}

🔍 Evaluating model: mcnkpsf4/checkpoint-646


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✅ AUC_ROC > 0.99 for mcnkpsf4/checkpoint-646
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.17545665800571442, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.991715976331361, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1169, 'eval_samples_per_second': 1223.64, 'eval_steps_per_second': 42.785}

🔍 Evaluating model: mc7f44lm/checkpoint-494


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping mc7f44lm/checkpoint-494

🔍 Evaluating model: mc7f44lm/checkpoint-304


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping mc7f44lm/checkpoint-304

🔍 Evaluating model: mc7f44lm/checkpoint-456


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping mc7f44lm/checkpoint-456

🔍 Evaluating model: mc7f44lm/checkpoint-380


  trainer = WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


❌ Skipping mc7f44lm/checkpoint-380

🔍 Evaluating model: mc7f44lm/checkpoint-418


❌ Skipping mc7f44lm/checkpoint-418


### Best model for clintox chem 77M MTR:u5fiiru1/checkpoint-228

📌 Test Results: {'eval_mcc_metric': 0.7416198487095662, 'eval_loss': 0.5224099159240723, 'eval_Accuracy': 0.9370629370629371, 'eval_AUC-ROC': 0.9875739644970415, 'eval_Precision': 0.5909090909090909, 'eval_Recall': 1.0, 'eval_F1-score': 0.7428571428571429, 'eval_runtime': 0.2582, 'eval_samples_per_second': 553.851, 'eval_steps_per_second': 11.619}

### Best model for clintox chem 10M MLM : 3j0ob8j9/checkpoint-266

📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.18695949018001556, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.9881656804733728, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.7212, 'eval_samples_per_second': 198.273, 'eval_steps_per_second': 4.16}

### Best model for clintox chem 10M MTR: r98v7m7c/checkpoint-152

📌 Test Results: {'eval_mcc_metric': 0.7041543391425868, 'eval_loss': 0.5360366702079773, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9230769230769231, 'eval_AUC-ROC': 0.9852071005917159, 'eval_Precision': 0.5416666666666666, 'eval_Recall': 1.0, 'eval_F1-score': 0.7027027027027027, 'eval_runtime': 0.1188, 'eval_samples_per_second': 1204.016, 'eval_steps_per_second': 42.098}

### Best model for clin chem 5M MTR: mcnkpsf4/checkpoint-646
📌 Test Results: {'eval_mcc_metric': 0.8693611470909237, 'eval_loss': 0.17545665800571442, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.9790209790209791, 'eval_AUC-ROC': 0.991715976331361, 'eval_Precision': 0.9166666666666666, 'eval_Recall': 0.8461538461538461, 'eval_F1-score': 0.88, 'eval_runtime': 0.1169, 'eval_samples_per_second': 1223.64, 'eval_steps_per_second': 42.785}



## Load and Merge the best model

In [22]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    'DeepChem/ChemBERTa-5M-MTR',
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [23]:
from peft import PeftModel  

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/clintox_models_5MTR/mcnkpsf4/checkpoint-646")



In [24]:
final_model_clintox_chemberta_5m_MTR= adapter_model.merge_and_unload()

### Save model to Chemberta finetuned model lora 100M MTR

In [25]:
save_path = "/home/raghvendra2/Molformer_Finetuning/Clintox_Final_chemberta_5m_mtr_model"

final_model_clintox_chemberta_5m_MTR.save_pretrained(save_path)

