## Finetuning Chemberta for FLAVOR Dataset Property Prediction Task using LoRA Technique


In [1]:
#Importing Libraries

import evaluate
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score,matthews_corrcoef
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import Dataset
from datasets import load_dataset
import datasets
from peft import LoraConfig, get_peft_model
import torch
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
from transformers import Trainer
from transformers import TrainingArguments, EarlyStoppingCallback
import pandas as pd
import numpy as np 

  from .autonotebook import tqdm as notebook_tqdm


### Loading the Datasets

In [66]:
train_flav=pd.read_csv('./fart_train.csv')
train_flav.reset_index(drop=True, inplace=True)
val_flav=pd.read_csv('./fart_val.csv')
val_flav.reset_index(drop=True, inplace=True)
test_flav=pd.read_csv('./fart_test.csv')
test_flav.reset_index(drop=True, inplace=True)

In [67]:
train_flav.drop('Unnamed: 0',axis=1, inplace=True)
val_flav.drop('Unnamed: 0',axis=1, inplace=True)
test_flav.drop('Unnamed: 0',axis=1, inplace=True)

## Chemberta Model Sited in FART Paper

In [14]:
model_checkpoint = "seyonec/SMILES_tokenized_PubChem_shard00_160k"  


tokenizer = AutoTokenizer.from_pretrained(model_checkpoint,trust_remote_code=True)
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=5,
    trust_remote_code=True
)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/SMILES_tokenized_PubChem_shard00_160k and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Other Chemberta Models 

## Model 77M MLM model

In [26]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForMaskedLM

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-77M-MLM")
model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-77M-MLM",
    num_labels=5,
    trust_remote_code=True
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Model 10M MLM model

In [53]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForMaskedLM

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-10M-MLM")
model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-10M-MLM",
    num_labels=5,
    trust_remote_code=True
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-10M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Model 10M MTR Model

In [17]:
# Load model directly
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-10M-MTR")
model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-10M-MTR",
    num_labels=5,
    trust_remote_code=True
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-10M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Model 77M MTR Model

In [68]:
# Load model directly
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-77M-MTR")
model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-77M-MTR",
    num_labels=5,
    trust_remote_code=True
)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Model 5M MTR Model

In [19]:
## Model 5M MTR Model

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-5M-MTR")
model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-5M-MTR",
    num_labels=5,
    trust_remote_code=True
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Preparing Dataset

In [69]:
train_dataset = Dataset.from_pandas(train_flav)
val_dataset = Dataset.from_pandas(val_flav)
test_dataset = Dataset.from_pandas(test_flav)


In [70]:
def tokenize_function(examples):

    return tokenizer(examples["Canonicalized SMILES"], padding="max_length", truncation=True, max_length=512)


train_dataset = train_dataset.map(tokenize_function, batched=True)

val_dataset = val_dataset.map(tokenize_function, batched=True)

test_dataset = test_dataset.map(tokenize_function, batched=True)

Map: 100%|██████████| 10517/10517 [00:03<00:00, 3401.16 examples/s]
Map: 100%|██████████| 2254/2254 [00:00<00:00, 3880.74 examples/s]
Map: 100%|██████████| 2254/2254 [00:00<00:00, 3882.58 examples/s]


In [71]:
label_encoder = LabelEncoder() 

In [72]:

encoded_labels = label_encoder.fit_transform(train_dataset['Canonicalized Taste'])

train_dataset = train_dataset.add_column('label', encoded_labels)

encoded_labels = label_encoder.fit_transform(val_dataset['Canonicalized Taste'])

val_dataset = val_dataset.add_column('label', encoded_labels)

encoded_labels = label_encoder.fit_transform(test_dataset['Canonicalized Taste'])

test_dataset = test_dataset.add_column('label', encoded_labels) 

In [73]:
columns_to_remove = ["Canonicalized SMILES", "Standardized SMILES", 
                     "Canonicalized Taste", "Original Labels", "Source", "is_multiclass"]

# Remove columns from all datasets
train_dataset = train_dataset.remove_columns(columns_to_remove)
val_dataset = val_dataset.remove_columns(columns_to_remove)
test_dataset = test_dataset.remove_columns(columns_to_remove) 

### Compute Metrics


In [32]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):

    logits, labels = eval_pred

    predictions = np.argmax(logits, axis=-1)
    probabilities= softmax(logits, axis=1)
    mcc = matthews_corrcoef(labels, predictions)

        
    return {
            "eval_mcc_metric": mcc,
            "Accuracy": metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities,multi_class="ovr"),  # AUC-ROC requires probabilities
            "Precision": precision_score(labels, predictions,average="macro"),
            "Recall": recall_score(labels, predictions,average="macro"),
            "F1-score": f1_score(labels, predictions,average="macro")
        }

### define LoRA Config

In [11]:

lora_config = LoraConfig(
    task_type="SEQ_CLS",  # Sequence classification task
    r=8,  # Rank of LoRA matrices
    lora_alpha=16,  # Scaling factor double of rank( from the rule of thumb)
    target_modules='all-linear',
    lora_dropout=0.1,  
    #init_lora_weights="gaussian"
)

model = get_peft_model(model, lora_config)

### Define Custom Trainer

In [33]:
#focal loss computation

def focal_loss_multiclass(inputs, targets, alpha=1, gamma=2):
    log_prob = F.log_softmax(inputs, dim=-1)
    prob = torch.exp(log_prob)  # Convert log probabilities back to normal probabilities

    targets_one_hot = F.one_hot(targets, num_classes=inputs.shape[-1])
    pt = torch.sum(prob * targets_one_hot, dim=-1)  # Get probability of the true class

    focal_loss = -alpha * (1 - pt) ** gamma * torch.sum(log_prob * targets_one_hot, dim=-1)
    
    return focal_loss.mean() 

In [34]:

class CustomTrainer(Trainer):
    
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        loss = focal_loss_multiclass(logits, labels)
        
        return (loss, outputs) if return_outputs else loss 

### Train The model

In [14]:

training_args = TrainingArguments(
    output_dir="./results_flavor_chemberta_100m_mtr_lora",
    evaluation_strategy="steps",
    learning_rate=1e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=10,
    weight_decay=0.01,
    save_strategy="steps",
    logging_dir="./logs_flavor_chemberta",
    logging_strategy="steps",
    logging_steps=100,
    save_total_limit=10,
    remove_unused_columns=False,
    metric_for_best_model='eval_mcc_metric',
    greater_is_better=True,  
    load_best_model_at_end=True,

         
)



In [15]:
trainer = CustomTrainer(

    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    
)

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


## Hyperparameter tuning using WandB

In [2]:
import wandb
import os

In [23]:
wandb.login()



True

In [3]:
def data_load():
    train_flav=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/fart_train.csv')
    val_flav=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/fart_val.csv')
    train_flav.drop('Unnamed: 0',axis=1, inplace=True)
    val_flav.drop('Unnamed: 0',axis=1, inplace=True)

    return train_flav, val_flav  

In [4]:
def data_prep(data_process):

    dataset = Dataset.from_pandas(data_process)
       

    return dataset

def tokenize_function(examples,tokenizer):

    return tokenizer(examples["Canonicalized SMILES"], padding="max_length", truncation=True, max_length=512)


In [5]:
def label_encoding(dataset):

    label_encoder = LabelEncoder()

    encoded_labels = label_encoder.fit_transform(dataset['Canonicalized Taste'])

    dataset = dataset.add_column('label', encoded_labels)
    
    columns_to_remove = ["Canonicalized SMILES", "Standardized SMILES", 
                     "Canonicalized Taste", "Original Labels", "Source", "is_multiclass"]


    dataset = dataset.remove_columns(columns_to_remove)

    return dataset

In [6]:
from peft import LoraConfig, get_peft_model

def lora_config(r, lora_alpha, dropout):

    lora_config = LoraConfig(
        task_type="SEQ_CLS",  # Sequence classification task
        r=r,  # Rank of LoRA matrices
        lora_alpha=lora_alpha,  # Scaling factor double of rank( from the rule of thumb)
        target_modules='all-linear',
        lora_dropout=dropout  # Dropout rate
        #init_lora_weights="gaussian"
    )

    return lora_config

In [7]:

def focal_loss_multiclass(inputs, targets, alpha=1, gamma=2):
    log_prob = F.log_softmax(inputs, dim=-1)
    prob = torch.exp(log_prob)  # Convert log probabilities back to normal probabilities

    targets_one_hot = F.one_hot(targets, num_classes=inputs.shape[-1])
    pt = torch.sum(prob * targets_one_hot, dim=-1)  # Get probability of the true class

    focal_loss = -alpha * (1 - pt) ** gamma * torch.sum(log_prob * targets_one_hot, dim=-1)
    
    return focal_loss.mean()

In [8]:
class CustomTrainer(Trainer):
    
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        loss = focal_loss_multiclass(logits, labels)
        
        return (loss, outputs) if return_outputs else loss

## 100M MTR Model hyperparameter tuning

In [None]:

def run_training():

    run = wandb.init(project="flavor analysis chemberta Hyperparameter Tuning")
    config = run.config


    save_dir = f"./models_Mlm_10_Chemberta/{wandb.run.id}"  # Unique directory for each run
    os.makedirs(save_dir, exist_ok=True)

   
    tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-10M-MLM")


    train_data, val_data=data_load()
    training_data=data_prep(train_data)
    validation_data=data_prep(val_data)    
    training_data=training_data.map(lambda x: tokenize_function(x, tokenizer), batched=True)
    validation_data=validation_data.map(lambda x: tokenize_function(x, tokenizer), batched=True)


    training_data=label_encoding(training_data)
    validation_data=label_encoding(validation_data)


    # Load the model with a classification head

    
    model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-10M-MLM",                                 # Define any other model here
    num_labels=5,
    trust_remote_code=True
    )

    peft_config = lora_config(config.r, config.lora_alpha, config.dropout)
    lora_model = get_peft_model(model, peft_config)
    lora_model.print_trainable_parameters()

    training_args = TrainingArguments(
    output_dir=save_dir,
    evaluation_strategy="steps",
    learning_rate=config.lr,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    save_strategy="steps",
    logging_dir="./logs_flavor_chem_wandb",
    logging_strategy="steps",
    logging_steps=100,
    report_to="wandb",
    load_best_model_at_end=True,
    metric_for_best_model="eval_mcc_metric",
    greater_is_better=True,
    remove_unused_columns=False,

    )


    metric = evaluate.load("accuracy")

    def compute_metrics(eval_pred):

        logits, labels = eval_pred

        predictions = np.argmax(logits, axis=-1)
        probabilities= softmax(logits, axis=1)
        mcc = matthews_corrcoef(labels, predictions)

            
        return {
                "eval_mcc_metric": mcc,
                "Accuracy": metric.compute(predictions=predictions, references=labels)["accuracy"],
                "AUC-ROC": roc_auc_score(labels, probabilities,multi_class="ovr"),  # AUC-ROC requires probabilities
                "Precision": precision_score(labels, predictions,average="macro"),
                "Recall": recall_score(labels, predictions,average="macro"),
                "F1-score": f1_score(labels, predictions,average="macro")
            }


    trainer_flavor = CustomTrainer(
    model=lora_model,
    args=training_args,
    train_dataset=training_data,
    eval_dataset= validation_data,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )

    trainer_flavor.train()
    trainer_flavor.save_model(save_dir)
        
    print(f"Model saved to {save_dir}")

    wandb.finish()

    

### To check best params for more number of epochs

In [None]:
trainer_flavor.train()
trainer_flavor.save_model('./manual_config_chemberta')

### Defining Parameter

In [None]:
def main():

    sweep_config = {
    "name": "Flavor Hyperparameter Tuning",
    "method": "bayes",
    "metric": {
        "goal": "maximize", 
        "name": "eval/mcc_metric"
    },
    "parameters": {
        "lr": {
            "distribution": "uniform",
            "min": 1e-5,  
            "max": 2e-3
        },
        "r": {
            "values": [4,8,16,32,64, 128]
        },
        "lora_alpha": {
            "values": [4,8,16,32,64,128]
        },
        "dropout": {
            "values": [0.0,0.1,0.2]
        },
        
        "optimizer": {
            "value": ["adamw"]
        }
    }
}

    sweep_id = wandb.sweep(sweep_config, project="huggingface")
    wandb.agent(sweep_id, function=run_training, count=5)

    api = wandb.Api()
    sweep = api.sweep(f"huggingface/{sweep_id}")
    print(sweep.runs[0].summary_metrics)

    runs_with_rmse = [run for run in sweep.runs if 'eval/mcc_metric' in run.summary_metrics]
    if runs_with_rmse:
        # Sort by rmse in descending order (maximize)
        best_run = sorted(runs_with_rmse, key=lambda run: run.summary_metrics['eval/mcc_metric'])[0]
    else:
        raise ValueError("No runs found with 'eval/mcc_metric' metric.")

    best_hyperparameters = best_run.config
    print(f"Best hyperparameters: {best_hyperparameters}")
    

if __name__ == "__main__":
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    main()

Create sweep with ID: p5bx31ga
Sweep URL: https://wandb.ai/harodharsha21-iit-ropar/huggingface/sweeps/p5bx31ga


[34m[1mwandb[0m: Agent Starting Run: oggmqthu with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 0.0017052657096512777
[34m[1mwandb[0m: 	optimizer: ['adamw']
[34m[1mwandb[0m: 	r: 128
[34m[1mwandb[0m: Currently logged in as: [33mharodharsha21[0m ([33mharodharsha21-iit-ropar[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Map: 100%|██████████| 10517/10517 [00:02<00:00, 3773.50 examples/s]
Map: 100%|██████████| 2254/2254 [00:00<00:00, 3939.05 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 2,128,773 || all params: 5,706,234 || trainable%: 37.3061


  trainer_flavor = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
100,0.3721,0.223117,0.689075,0.842059,0.926388,0.631765,0.583832,0.59418
200,0.312,0.213882,0.710414,0.848713,0.935786,0.617623,0.610957,0.60741
300,0.302,0.200733,0.735789,0.856256,0.94515,0.689243,0.776865,0.723069
400,0.2662,0.191977,0.732917,0.858474,0.957703,0.625368,0.642933,0.633809
500,0.2625,0.190028,0.735321,0.858917,0.959356,0.829013,0.776665,0.795119
600,0.2313,0.178072,0.751707,0.865129,0.959807,0.660544,0.788225,0.681024
700,0.2157,0.150595,0.764945,0.880213,0.971509,0.799686,0.776359,0.785588
800,0.2264,0.158628,0.752306,0.870453,0.952124,0.654271,0.647802,0.646536
900,0.2294,0.145187,0.771493,0.8811,0.970972,0.789703,0.793413,0.791301
1000,0.2102,0.142044,0.75525,0.87134,0.972498,0.844589,0.714996,0.74571


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved to ./models_MTR100_Chemberta/oggmqthu


0,1
eval/AUC-ROC,▁▂▄▅▆▆▇▅▇▇██████
eval/Accuracy,▁▂▃▃▄▄▇▅▇▅▇▇▆▇▇█
eval/F1-score,▁▁▅▂▇▃▆▃▇▅█▅▆█▇▇
eval/Precision,▁▁▃▁█▂▇▂▆█▇▄▄▇▆▇
eval/Recall,▁▂▆▂▆▆▆▃▆▄█▆███▆
eval/loss,█▇▆▆▆▅▃▃▃▂▂▂▁▂▁▁
eval/mcc_metric,▁▃▄▄▄▅▆▆▇▆▇▇▇███
eval/runtime,▂▁▁▁▇▆█▄▂▄▄▂▁▂▂▁
eval/samples_per_second,▇███▂▃▁▅▇▅▅▇█▇▇█
eval/steps_per_second,▇███▂▃▁▅▇▅▅▇█▇▇█

0,1
eval/AUC-ROC,0.97711
eval/Accuracy,0.8882
eval/F1-score,0.80249
eval/Precision,0.80054
eval/Recall,0.80549
eval/loss,0.1238
eval/mcc_metric,0.78692
eval/runtime,4.0481
eval/samples_per_second,556.81
eval/steps_per_second,17.539


[34m[1mwandb[0m: Agent Starting Run: imo6nm9x with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 0.001090370280478251
[34m[1mwandb[0m: 	optimizer: ['adamw']
[34m[1mwandb[0m: 	r: 64


Map: 100%|██████████| 10517/10517 [00:02<00:00, 3761.15 examples/s]
Map: 100%|██████████| 2254/2254 [00:00<00:00, 3907.92 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,139,269 || all params: 4,642,682 || trainable%: 24.5390


  trainer_flavor = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
100,0.363,0.203302,0.698741,0.847826,0.951003,0.625263,0.593799,0.607826
200,0.2761,0.182639,0.713612,0.85315,0.961684,0.822182,0.812947,0.813223
300,0.2757,0.178204,0.746159,0.862467,0.961817,0.823076,0.72251,0.737629
400,0.2355,0.169735,0.715226,0.854925,0.965896,0.837466,0.748617,0.785853
500,0.2373,0.165078,0.744515,0.863798,0.963276,0.71258,0.783838,0.740727
600,0.2182,0.153336,0.762573,0.87622,0.972265,0.733287,0.845288,0.755418
700,0.1928,0.135687,0.760331,0.878438,0.976473,0.864175,0.77074,0.808796
800,0.2022,0.138744,0.761127,0.873114,0.975677,0.645151,0.661335,0.651822
900,0.2083,0.134805,0.775197,0.885093,0.973723,0.816869,0.848704,0.827289
1000,0.1891,0.125999,0.775966,0.885093,0.976379,0.86919,0.720487,0.760723


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved to ./models_MTR100_Chemberta/imo6nm9x


0,1
eval/AUC-ROC,▁▄▄▅▄▆▇▇▇▇▇▇████
eval/Accuracy,▁▂▃▂▄▆▆▅▇▇▆▇▇▇██
eval/F1-score,▁▇▅▇▅▆▇▂█▆▇▇▇█▇█
eval/Precision,▁▇▇▇▄▄█▂▆█▆▆▆█▆█
eval/Recall,▁▇▅▅▆█▆▃█▄▇▇▇▇▇▇
eval/loss,█▆▆▅▅▄▃▃▃▂▂▂▁▁▁▁
eval/mcc_metric,▁▂▅▂▄▆▆▆▇▇▆▇▇███
eval/runtime,▁▃▆▃▃█▂▁▃▃█▃▄▁▁▂
eval/samples_per_second,█▆▃▆▆▁▇█▆▆▁▆▅██▇
eval/steps_per_second,█▆▃▆▆▁▇█▆▆▁▆▅██▇

0,1
eval/AUC-ROC,0.9792
eval/Accuracy,0.89086
eval/F1-score,0.83109
eval/Precision,0.86863
eval/Recall,0.8071
eval/loss,0.1146
eval/mcc_metric,0.79092
eval/runtime,3.9074
eval/samples_per_second,576.858
eval/steps_per_second,18.171


[34m[1mwandb[0m: Agent Starting Run: glzr8jim with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 0.0011492784634238222
[34m[1mwandb[0m: 	optimizer: ['adamw']
[34m[1mwandb[0m: 	r: 128


Map: 100%|██████████| 10517/10517 [00:02<00:00, 3804.38 examples/s]
Map: 100%|██████████| 2254/2254 [00:00<00:00, 3925.03 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 2,128,773 || all params: 5,706,234 || trainable%: 37.3061


  trainer_flavor = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
100,0.3633,0.221187,0.670252,0.832742,0.936991,0.599444,0.572938,0.582786
200,0.281,0.189608,0.735002,0.857587,0.958517,0.813728,0.776404,0.786809
300,0.2788,0.180736,0.753124,0.868678,0.961806,0.739694,0.787603,0.760614
400,0.2377,0.197088,0.62404,0.817657,0.963485,0.838941,0.66037,0.726857
500,0.2448,0.17517,0.73917,0.859805,0.961712,0.823054,0.781396,0.793482
600,0.2168,0.154075,0.767593,0.877551,0.971343,0.721025,0.851831,0.742313
700,0.2003,0.129148,0.764519,0.879769,0.97546,0.858464,0.839514,0.847164
800,0.2042,0.134454,0.766344,0.876664,0.974284,0.723579,0.728323,0.723969
900,0.2075,0.135654,0.781343,0.887755,0.972494,0.770035,0.789416,0.775852
1000,0.1964,0.123058,0.770132,0.881988,0.976953,0.865389,0.784615,0.817121


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved to ./models_MTR100_Chemberta/glzr8jim


0,1
eval/AUC-ROC,▁▅▅▅▅▇▇▇▇█▇█████
eval/Accuracy,▂▅▆▁▅▇▇▇█▇▇█▇███
eval/F1-score,▁▆▆▅▇▅█▅▆▇▇█▆█▇█
eval/Precision,▁▇▅▇▇▄█▄▅█▆▇▅▇▅▇
eval/Recall,▁▆▆▃▆▇▇▅▆▆▆█████
eval/loss,█▆▅▆▅▄▂▂▂▂▂▂▁▁▁▁
eval/mcc_metric,▃▆▆▁▆▇▇▇█▇██████
eval/runtime,▅▂▁▅▆▃█▂▂▂▄▂▂▂▃▇
eval/samples_per_second,▄▇█▄▃▆▁▇▇▇▅▇▇▇▆▂
eval/steps_per_second,▄▇█▄▃▆▁▇▇▇▅▇▇▇▆▂

0,1
eval/AUC-ROC,0.97891
eval/Accuracy,0.88864
eval/F1-score,0.83873
eval/Precision,0.82054
eval/Recall,0.86531
eval/loss,0.11412
eval/mcc_metric,0.78543
eval/runtime,4.1871
eval/samples_per_second,538.325
eval/steps_per_second,16.957


[34m[1mwandb[0m: Agent Starting Run: y6ioij00 with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 0.0009721589973334608
[34m[1mwandb[0m: 	optimizer: ['adamw']
[34m[1mwandb[0m: 	r: 128


Map: 100%|██████████| 10517/10517 [00:02<00:00, 3794.35 examples/s]
Map: 100%|██████████| 2254/2254 [00:00<00:00, 3853.10 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 2,128,773 || all params: 5,706,234 || trainable%: 37.3061


  trainer_flavor = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
100,0.3636,0.20939,0.70446,0.846051,0.951406,0.621534,0.611995,0.609518
200,0.276,0.173565,0.720749,0.85803,0.96305,0.832882,0.811748,0.81807
300,0.2737,0.183659,0.737483,0.855812,0.958301,0.813254,0.78923,0.792354
400,0.2287,0.199301,0.67369,0.837622,0.945686,0.842906,0.707133,0.760877
500,0.2375,0.169542,0.744847,0.862023,0.956564,0.727155,0.784071,0.75144
600,0.2119,0.157318,0.760887,0.875333,0.968162,0.736452,0.844286,0.755085
700,0.1956,0.138943,0.760961,0.878882,0.973667,0.816203,0.835803,0.820232
800,0.1978,0.139936,0.768099,0.877107,0.972278,0.853698,0.79706,0.816335
900,0.2032,0.140686,0.761975,0.878882,0.970545,0.81323,0.838388,0.818749
1000,0.1898,0.128867,0.776309,0.88598,0.974617,0.87125,0.782432,0.818917


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved to ./models_MTR100_Chemberta/y6ioij00


0,1
eval/AUC-ROC,▂▅▄▁▃▆▇▇▇▇▇█████
eval/Accuracy,▂▄▃▁▄▆▆▆▆▇▆█▆▇██
eval/F1-score,▁▇▆▅▅▅▇▆▇▇█▇▆█▇▇
eval/Precision,▁▇▆▇▄▄▆█▆█▇▇▅█▇▇
eval/Recall,▁▆▆▃▅▇▇▆▇▅██████
eval/loss,█▅▆▇▅▄▃▃▃▂▂▂▁▁▁▁
eval/mcc_metric,▃▄▅▁▅▆▆▆▆▇▇█▇███
eval/runtime,▁▂▁▁▁▃▃▂▁█▁▃▂▁▁▁
eval/samples_per_second,█▇███▆▆▇█▁█▆▇▇██
eval/steps_per_second,█▇███▆▆▇█▁█▆▇▇██

0,1
eval/AUC-ROC,0.97724
eval/Accuracy,0.89308
eval/F1-score,0.84478
eval/Precision,0.82408
eval/Recall,0.87314
eval/loss,0.11902
eval/mcc_metric,0.79426
eval/runtime,4.0543
eval/samples_per_second,555.955
eval/steps_per_second,17.512


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: to3uvb1h with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 0.001092980471805136
[34m[1mwandb[0m: 	optimizer: ['adamw']
[34m[1mwandb[0m: 	r: 128


Map: 100%|██████████| 10517/10517 [00:02<00:00, 3650.60 examples/s]
Map: 100%|██████████| 2254/2254 [00:00<00:00, 3855.54 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 2,128,773 || all params: 5,706,234 || trainable%: 37.3061


  trainer_flavor = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
100,0.3612,0.218163,0.687245,0.839397,0.942113,0.613976,0.595243,0.601145
200,0.2777,0.183562,0.745702,0.86646,0.959946,0.830733,0.774714,0.793639
300,0.2724,0.177634,0.746863,0.864241,0.96125,0.826519,0.719926,0.738828
400,0.239,0.196164,0.670627,0.834073,0.958913,0.768313,0.715214,0.735569
500,0.2392,0.17621,0.747892,0.865572,0.956969,0.733378,0.783156,0.754735
600,0.2168,0.158084,0.759298,0.874445,0.967742,0.744523,0.839834,0.760698
700,0.2,0.135716,0.771171,0.882431,0.975543,0.863401,0.7834,0.815475
800,0.199,0.13938,0.764518,0.874889,0.974871,0.850055,0.728172,0.753449
900,0.2058,0.132802,0.775513,0.884206,0.97439,0.782033,0.856066,0.807673
1000,0.1932,0.123457,0.771549,0.883319,0.976518,0.870169,0.782056,0.817798


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model saved to ./models_MTR100_Chemberta/to3uvb1h


0,1
eval/AUC-ROC,▁▅▅▄▄▆█▇▇█▇▇████
eval/Accuracy,▂▅▅▁▅▆▇▆▇▇▆▇▇▇██
eval/F1-score,▁▇▅▅▆▆▇▆▇▇█▇█▇██
eval/Precision,▁▇▇▅▄▅█▇▆█▆▆▆▆▆▆
eval/Recall,▁▅▄▄▆▇▆▄▇▆█▆█▆██
eval/loss,█▆▅▆▅▄▂▂▂▁▂▂▁▁▁▁
eval/mcc_metric,▂▅▅▁▅▆▇▆▇▇▇▇▇▇██
eval/runtime,▂▃▂█▁▂▁▂▂▂▂▁▁▁▁▁
eval/samples_per_second,▇▆▇▁█▇█▇▇▇▇█████
eval/steps_per_second,▇▆▇▁█▇█▇▇▇▇█████

0,1
eval/AUC-ROC,0.97768
eval/Accuracy,0.89352
eval/F1-score,0.8238
eval/Precision,0.79527
eval/Recall,0.87347
eval/loss,0.11861
eval/mcc_metric,0.79544
eval/runtime,4.0552
eval/samples_per_second,555.832
eval/steps_per_second,17.508


{'_runtime': 295.493498149, '_step': 32, '_timestamp': 1742899458.7640474, '_wandb': {'runtime': 295}, 'eval/AUC-ROC': 0.9776794999278964, 'eval/Accuracy': 0.8935226264418811, 'eval/F1-score': 0.8237965481576575, 'eval/Precision': 0.7952690077541764, 'eval/Recall': 0.8734746496338153, 'eval/loss': 0.11860866844654085, 'eval/mcc_metric': 0.7954413436068798, 'eval/runtime': 4.0552, 'eval/samples_per_second': 555.832, 'eval/steps_per_second': 17.508, 'total_flos': 852563953612800, 'train/epoch': 5, 'train/global_step': 1645, 'train/grad_norm': 0.9067480564117432, 'train/learning_rate': 2.9899161842693692e-05, 'train/loss': 0.1448, 'train_loss': 0.20984870858467825, 'train_runtime': 287.0036, 'train_samples_per_second': 183.221, 'train_steps_per_second': 5.732}


### Evaluation

In [42]:
import torch
import numpy as np
import random

# Set random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

# Ensure deterministic behavior in PyTorch computations
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [30]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-10M-MTR",
    num_labels=5,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-10M-MTR",trust_remote_code=True)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-10M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [53]:
from peft import PeftModel  

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/results_flavor_chemberta_wandb_reepoch_10mtr/checkpoint-1000")



In [19]:
#compute metrics
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):

    logits, labels = eval_pred

    predictions = np.argmax(logits, axis=-1)
    probabilities= softmax(logits, axis=1)
    mcc = matthews_corrcoef(labels, predictions)

        
    return {
            "eval_mcc_metric": mcc,
            "Accuracy": metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities,multi_class="ovr"),  # AUC-ROC requires probabilities
            "Precision": precision_score(labels, predictions,average="macro"),
            "Recall": recall_score(labels, predictions,average="macro"),
            "F1-score": f1_score(labels, predictions,average="macro")
        }


In [28]:
training_args = TrainingArguments(
    output_dir="./test_results_flavor",
    per_device_eval_batch_size=32,
    report_to="none",  # Disable logging to W&B for test
    

)

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-77M-MTR",trust_remote_code=True)

In [54]:
trainer= CustomTrainer(
    model=adapter_model,
    args=training_args,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
    )

  trainer= CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


### Checking for all 77 MTR Moels

In [17]:
from peft import PeftModel

In [None]:

import os

models_dir = "./models_MTR100_Chemberta"

for ckpt in os.listdir(models_dir):
    model_path = os.path.join(models_dir, ckpt)
    
    if os.path.isdir(model_path):
        adapter_config_path = os.path.join(model_path, "adapter_config.json")
        
        if not os.path.exists(adapter_config_path):
            print(f"⚠️ Missing 'adapter_config.json' in {model_path}")

# Evaluate each saved model

valid_checkpoints = [
    os.path.join(models_dir, ckpt)
    for ckpt in os.listdir(models_dir)
    if os.path.isdir(os.path.join(models_dir, ckpt)) and 
       os.path.exists(os.path.join(models_dir, ckpt, "adapter_config.json"))
]

print("Valid Model Checkpoints:", valid_checkpoints)

for model_path in valid_checkpoints:
    print(f"Evaluating model: {model_path}")

    adapter_model = PeftModel.from_pretrained(base_model, model_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()
    print(f"Test Results for {model_path}: {test_results_clin}")



⚠️ Missing 'adapter_config.json' in ./models_MTR100_Chemberta/ujye8pur
⚠️ Missing 'adapter_config.json' in ./models_MTR100_Chemberta/5k0m1ybg
⚠️ Missing 'adapter_config.json' in ./models_MTR100_Chemberta/vx2zkfhq
⚠️ Missing 'adapter_config.json' in ./models_MTR100_Chemberta/tl30zdlb
⚠️ Missing 'adapter_config.json' in ./models_MTR100_Chemberta/t6motadp
Valid Model Checkpoints: ['./models_MTR100_Chemberta/glzr8jim', './models_MTR100_Chemberta/y6ioij00', './models_MTR100_Chemberta/imo6nm9x', './models_MTR100_Chemberta/to3uvb1h', './models_MTR100_Chemberta/oggmqthu']
Evaluating model: ./models_MTR100_Chemberta/glzr8jim


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/glzr8jim: {'eval_mcc_metric': 0.7857732041076384, 'eval_loss': 0.13933634757995605, 'eval_model_preparation_time': 0.0047, 'eval_Accuracy': 0.8864241348713399, 'eval_AUC-ROC': 0.9560709660479298, 'eval_Precision': 0.80451841570831, 'eval_Recall': 0.7921601853640271, 'eval_F1-score': 0.7971128505954014, 'eval_runtime': 5.277, 'eval_samples_per_second': 427.137, 'eval_steps_per_second': 13.455}
Evaluating model: ./models_MTR100_Chemberta/y6ioij00


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/y6ioij00: {'eval_mcc_metric': 0.7777237717003995, 'eval_loss': 0.13908489048480988, 'eval_model_preparation_time': 0.0045, 'eval_Accuracy': 0.8824312333629104, 'eval_AUC-ROC': 0.9680355972198766, 'eval_Precision': 0.8615301215900842, 'eval_Recall': 0.755471099116446, 'eval_F1-score': 0.7915827963618229, 'eval_runtime': 4.6413, 'eval_samples_per_second': 485.64, 'eval_steps_per_second': 15.297}
Evaluating model: ./models_MTR100_Chemberta/imo6nm9x


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/imo6nm9x: {'eval_mcc_metric': 0.7818169758422309, 'eval_loss': 0.1437530368566513, 'eval_model_preparation_time': 0.0037, 'eval_Accuracy': 0.8850931677018633, 'eval_AUC-ROC': 0.9721605705286832, 'eval_Precision': 0.8250201990356656, 'eval_Recall': 0.7875592578624443, 'eval_F1-score': 0.8045247463831717, 'eval_runtime': 3.9065, 'eval_samples_per_second': 576.986, 'eval_steps_per_second': 18.175}
Evaluating model: ./models_MTR100_Chemberta/to3uvb1h


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/to3uvb1h: {'eval_mcc_metric': 0.777782885687969, 'eval_loss': 0.1434350460767746, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.883318544809228, 'eval_AUC-ROC': 0.9714154098726826, 'eval_Precision': 0.8247383778991484, 'eval_Recall': 0.7833793596443682, 'eval_F1-score': 0.8022087198646425, 'eval_runtime': 4.0312, 'eval_samples_per_second': 559.135, 'eval_steps_per_second': 17.613}
Evaluating model: ./models_MTR100_Chemberta/oggmqthu


Test Results for ./models_MTR100_Chemberta/oggmqthu: {'eval_mcc_metric': 0.7667172633940723, 'eval_loss': 0.14829595386981964, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8757763975155279, 'eval_AUC-ROC': 0.9531141891298681, 'eval_Precision': 0.7546311912638144, 'eval_Recall': 0.746212206833805, 'eval_F1-score': 0.7496316603246458, 'eval_runtime': 4.0222, 'eval_samples_per_second': 560.384, 'eval_steps_per_second': 17.652}


: 

### Checking for check points of glzr8jim model

In [52]:
from peft import PeftModel  

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/models_MTR100_Chemberta/to3uvb1h/checkpoint-1645")



In [33]:
test_results_flavor = trainer.evaluate()

print("Test Results for checkpoint 1645:", test_results_flavor)

Test Results for checkpoint 1645: {'eval_mcc_metric': 0.7857732041076384, 'eval_loss': 0.13933634757995605, 'eval_Accuracy': 0.8864241348713399, 'eval_AUC-ROC': 0.9560709660479298, 'eval_Precision': 0.80451841570831, 'eval_Recall': 0.7921601853640271, 'eval_F1-score': 0.7971128505954014, 'eval_runtime': 3.3347, 'eval_samples_per_second': 675.93, 'eval_steps_per_second': 5.398}


In [54]:
test_results_flavor = trainer.evaluate()

print("Test Results for checkpoint 1645:", test_results_flavor)

Test Results for checkpoint 1500: {'eval_mcc_metric': 0.7893589818962506, 'eval_loss': 0.14277541637420654, 'eval_Accuracy': 0.888642413487134, 'eval_AUC-ROC': 0.970534896759912, 'eval_Precision': 0.8309968335234739, 'eval_Recall': 0.7920133444927788, 'eval_F1-score': 0.8095269496517847, 'eval_runtime': 7.7548, 'eval_samples_per_second': 290.659, 'eval_steps_per_second': 2.321}


### Result for imo6nm9x checkpoint 1645  (Best Model until now)

In [None]:
test_results_flavor = trainer.evaluate()

print("Test Results for checkpoint 1645:", test_results_flavor) 

Test Results for checkpoint 1000: {'eval_mcc_metric': 0.7882497847517572, 'eval_loss': 0.14135079085826874, 'eval_Accuracy': 0.8873114463176575, 'eval_AUC-ROC': 0.9720325211171271, 'eval_Precision': 0.8327510094500526, 'eval_Recall': 0.8274682544764185, 'eval_F1-score': 0.8297152462405265, 'eval_runtime': 3.0135, 'eval_samples_per_second': 747.963, 'eval_steps_per_second': 5.973}


### The best model is : checkpoint 1500 of result flavor chemberta wandb for the original flavor paper model

### 10 MTR model

In [32]:
import os
from peft import PeftModel  

models_dir = "./models_MTR10_Chemberta"

for ckpt in os.listdir(models_dir):
    model_path = os.path.join(models_dir, ckpt)
    
    if os.path.isdir(model_path):
        adapter_config_path = os.path.join(model_path, "adapter_config.json")
        
        if not os.path.exists(adapter_config_path):
            print(f"⚠️ Missing 'adapter_config.json' in {model_path}")

# Evaluate each saved model

valid_checkpoints = [
    os.path.join(models_dir, ckpt)
    for ckpt in os.listdir(models_dir)
    if os.path.isdir(os.path.join(models_dir, ckpt)) and 
       os.path.exists(os.path.join(models_dir, ckpt, "adapter_config.json"))
]

print("Valid Model Checkpoints:", valid_checkpoints)

for model_path in valid_checkpoints:
    print(f"Evaluating model: {model_path}")

    adapter_model = PeftModel.from_pretrained(base_model, model_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()
    print(f"Test Results for {model_path}: {test_results_clin}")


Valid Model Checkpoints: ['./models_MTR10_Chemberta/g4esefrp', './models_MTR10_Chemberta/ejr2572h', './models_MTR10_Chemberta/ljfmymgw', './models_MTR10_Chemberta/6wz8dk9m', './models_MTR10_Chemberta/ng77zdds', './models_MTR10_Chemberta/01aq0qc9', './models_MTR10_Chemberta/qfe8vcqo', './models_MTR10_Chemberta/bc9aajo1', './models_MTR10_Chemberta/bxb0kwqm', './models_MTR10_Chemberta/wmaugze4', './models_MTR10_Chemberta/sdj2wujw', './models_MTR10_Chemberta/l3pooke3', './models_MTR10_Chemberta/6fbtk138', './models_MTR10_Chemberta/7zpn8eng', './models_MTR10_Chemberta/g9h0hhdh', './models_MTR10_Chemberta/9m90icx1', './models_MTR10_Chemberta/fh69w10f', './models_MTR10_Chemberta/izd262gb', './models_MTR10_Chemberta/1y764hkg', './models_MTR10_Chemberta/ot8tkhls']
Evaluating model: ./models_MTR10_Chemberta/g4esefrp


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/g4esefrp: {'eval_mcc_metric': -0.051316471359806895, 'eval_loss': 1.0326316356658936, 'eval_model_preparation_time': 0.0035, 'eval_Accuracy': 0.14640638864241348, 'eval_AUC-ROC': 0.43906379255989547, 'eval_Precision': 0.2489830873085715, 'eval_Recall': 0.18508591537359448, 'eval_F1-score': 0.14766558492259813, 'eval_runtime': 6.5404, 'eval_samples_per_second': 344.629, 'eval_steps_per_second': 10.856}
Evaluating model: ./models_MTR10_Chemberta/ejr2572h


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/ejr2572h: {'eval_mcc_metric': 0.7866560449462396, 'eval_loss': 0.1447119116783142, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.885980479148181, 'eval_AUC-ROC': 0.9715066356563075, 'eval_Precision': 0.8646903595235462, 'eval_Recall': 0.6974415806217465, 'eval_F1-score': 0.719808474928872, 'eval_runtime': 6.3306, 'eval_samples_per_second': 356.051, 'eval_steps_per_second': 11.215}
Evaluating model: ./models_MTR10_Chemberta/ljfmymgw


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/ljfmymgw: {'eval_mcc_metric': 0.7821532659249115, 'eval_loss': 0.14776849746704102, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8837622005323869, 'eval_AUC-ROC': 0.9723125903238156, 'eval_Precision': 0.8613768663665144, 'eval_Recall': 0.6965203566199836, 'eval_F1-score': 0.7186124420025131, 'eval_runtime': 6.3015, 'eval_samples_per_second': 357.695, 'eval_steps_per_second': 11.267}
Evaluating model: ./models_MTR10_Chemberta/6wz8dk9m


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/6wz8dk9m: {'eval_mcc_metric': 0.7668119811955401, 'eval_loss': 0.14871440827846527, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8775510204081632, 'eval_AUC-ROC': 0.9690304146799606, 'eval_Precision': 0.8111675250675251, 'eval_Recall': 0.7393671017209662, 'eval_F1-score': 0.767372027794409, 'eval_runtime': 6.3406, 'eval_samples_per_second': 355.486, 'eval_steps_per_second': 11.198}
Evaluating model: ./models_MTR10_Chemberta/ng77zdds


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/ng77zdds: {'eval_mcc_metric': 0.7903027900598758, 'eval_loss': 0.13793590664863586, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.888642413487134, 'eval_AUC-ROC': 0.9712670821306159, 'eval_Precision': 0.8297460295827033, 'eval_Recall': 0.7985800549192325, 'eval_F1-score': 0.8120801935825286, 'eval_runtime': 6.3815, 'eval_samples_per_second': 353.209, 'eval_steps_per_second': 11.126}
Evaluating model: ./models_MTR10_Chemberta/01aq0qc9


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/01aq0qc9: {'eval_mcc_metric': 0.7430506848294517, 'eval_loss': 0.18837043642997742, 'eval_model_preparation_time': 0.0045, 'eval_Accuracy': 0.8637976929902396, 'eval_AUC-ROC': 0.9323429148394224, 'eval_Precision': 0.8413073954652214, 'eval_Recall': 0.6646134687107592, 'eval_F1-score': 0.6913864900401119, 'eval_runtime': 5.5536, 'eval_samples_per_second': 405.864, 'eval_steps_per_second': 12.785}
Evaluating model: ./models_MTR10_Chemberta/qfe8vcqo


Test Results for ./models_MTR10_Chemberta/qfe8vcqo: {'eval_mcc_metric': 0.7958600550347535, 'eval_loss': 0.13877424597740173, 'eval_model_preparation_time': 0.0037, 'eval_Accuracy': 0.8917480035492458, 'eval_AUC-ROC': 0.9663726912480035, 'eval_Precision': 0.880921824044, 'eval_Recall': 0.7662753528081125, 'eval_F1-score': 0.805181953288814, 'eval_runtime': 6.2391, 'eval_samples_per_second': 361.272, 'eval_steps_per_second': 11.38}
Evaluating model: ./models_MTR10_Chemberta/bc9aajo1


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/bc9aajo1: {'eval_mcc_metric': 0.7643431820635597, 'eval_loss': 0.1676427721977234, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8735581188997338, 'eval_AUC-ROC': 0.9562373760530886, 'eval_Precision': 0.7645788136181467, 'eval_Recall': 0.7538178204002817, 'eval_F1-score': 0.7581014272158283, 'eval_runtime': 5.7196, 'eval_samples_per_second': 394.085, 'eval_steps_per_second': 12.413}
Evaluating model: ./models_MTR10_Chemberta/bxb0kwqm


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/bxb0kwqm: {'eval_mcc_metric': 0.7578172672777702, 'eval_loss': 0.15704374015331268, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8717834960070985, 'eval_AUC-ROC': 0.9631741922590947, 'eval_Precision': 0.8466492193563179, 'eval_Recall': 0.6729556781098237, 'eval_F1-score': 0.6985194228220318, 'eval_runtime': 5.8746, 'eval_samples_per_second': 383.686, 'eval_steps_per_second': 12.086}
Evaluating model: ./models_MTR10_Chemberta/wmaugze4


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/wmaugze4: {'eval_mcc_metric': 0.7458759952897328, 'eval_loss': 0.17986901104450226, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9327447034435042, 'eval_Precision': 0.7320474956194788, 'eval_Recall': 0.6674241202703084, 'eval_F1-score': 0.6829243355826524, 'eval_runtime': 5.5514, 'eval_samples_per_second': 406.026, 'eval_steps_per_second': 12.79}
Evaluating model: ./models_MTR10_Chemberta/sdj2wujw


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/sdj2wujw: {'eval_mcc_metric': 0.7333632887355505, 'eval_loss': 0.17621594667434692, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.860248447204969, 'eval_AUC-ROC': 0.9496709618840541, 'eval_Precision': 0.8420489107313628, 'eval_Recall': 0.6503143267384901, 'eval_F1-score': 0.6825494840074434, 'eval_runtime': 5.8442, 'eval_samples_per_second': 385.683, 'eval_steps_per_second': 12.149}
Evaluating model: ./models_MTR10_Chemberta/l3pooke3


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/l3pooke3: {'eval_mcc_metric': 0.7930913320253843, 'eval_loss': 0.14359517395496368, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8895297249334516, 'eval_AUC-ROC': 0.9621190217440319, 'eval_Precision': 0.7700547891183185, 'eval_Recall': 0.7665916641746098, 'eval_F1-score': 0.7678136810786037, 'eval_runtime': 6.4431, 'eval_samples_per_second': 349.829, 'eval_steps_per_second': 11.019}
Evaluating model: ./models_MTR10_Chemberta/6fbtk138


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/6fbtk138: {'eval_mcc_metric': 0.7933993539706464, 'eval_loss': 0.15008734166622162, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8873114463176575, 'eval_AUC-ROC': 0.9704246430750263, 'eval_Precision': 0.7670510669552366, 'eval_Recall': 0.7352891009623562, 'eval_F1-score': 0.7427875600405793, 'eval_runtime': 5.8871, 'eval_samples_per_second': 382.87, 'eval_steps_per_second': 12.06}
Evaluating model: ./models_MTR10_Chemberta/7zpn8eng


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/7zpn8eng: {'eval_mcc_metric': 0.7651667377248028, 'eval_loss': 0.14991040527820587, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8757763975155279, 'eval_AUC-ROC': 0.970537196967868, 'eval_Precision': 0.7902004049412119, 'eval_Recall': 0.7104300055721067, 'eval_F1-score': 0.7363837623750085, 'eval_runtime': 6.1911, 'eval_samples_per_second': 364.071, 'eval_steps_per_second': 11.468}
Evaluating model: ./models_MTR10_Chemberta/g9h0hhdh


Test Results for ./models_MTR10_Chemberta/g9h0hhdh: {'eval_mcc_metric': 0.7802386002887226, 'eval_loss': 0.1445203423500061, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.883318544809228, 'eval_AUC-ROC': 0.9715442514202284, 'eval_Precision': 0.8220906747114665, 'eval_Recall': 0.7903911904074594, 'eval_F1-score': 0.8042745847485124, 'eval_runtime': 5.9114, 'eval_samples_per_second': 381.3, 'eval_steps_per_second': 12.011}
Evaluating model: ./models_MTR10_Chemberta/9m90icx1


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/9m90icx1: {'eval_mcc_metric': 0.7514094282605679, 'eval_loss': 0.16853775084018707, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.867790594498669, 'eval_AUC-ROC': 0.9560897739864972, 'eval_Precision': 0.6891623854302724, 'eval_Recall': 0.6721829327142185, 'eval_F1-score': 0.6789185236137615, 'eval_runtime': 5.869, 'eval_samples_per_second': 384.05, 'eval_steps_per_second': 12.097}
Evaluating model: ./models_MTR10_Chemberta/fh69w10f


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/fh69w10f: {'eval_mcc_metric': 0.7889399220829554, 'eval_loss': 0.143296480178833, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8877551020408163, 'eval_AUC-ROC': 0.9722684012847921, 'eval_Precision': 0.8653844054536565, 'eval_Recall': 0.7304610308885935, 'eval_F1-score': 0.7636675609156894, 'eval_runtime': 5.9807, 'eval_samples_per_second': 376.882, 'eval_steps_per_second': 11.872}
Evaluating model: ./models_MTR10_Chemberta/izd262gb


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/izd262gb: {'eval_mcc_metric': 0.7535055121586963, 'eval_loss': 0.178395614027977, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8682342502218279, 'eval_AUC-ROC': 0.9349904085573113, 'eval_Precision': 0.7087291941627231, 'eval_Recall': 0.675850986501599, 'eval_F1-score': 0.6858847985640688, 'eval_runtime': 5.7972, 'eval_samples_per_second': 388.808, 'eval_steps_per_second': 12.247}
Evaluating model: ./models_MTR10_Chemberta/1y764hkg


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR10_Chemberta/1y764hkg: {'eval_mcc_metric': 0.7830645275961695, 'eval_loss': 0.1425733119249344, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8846495119787046, 'eval_AUC-ROC': 0.9724818401104148, 'eval_Precision': 0.8660406281416468, 'eval_Recall': 0.7554246405058954, 'eval_F1-score': 0.7920559684082156, 'eval_runtime': 6.1651, 'eval_samples_per_second': 365.604, 'eval_steps_per_second': 11.516}
Evaluating model: ./models_MTR10_Chemberta/ot8tkhls


Test Results for ./models_MTR10_Chemberta/ot8tkhls: {'eval_mcc_metric': 0.7672753355369059, 'eval_loss': 0.16462251543998718, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8762200532386868, 'eval_AUC-ROC': 0.9429835568930061, 'eval_Precision': 0.8636765181482524, 'eval_Recall': 0.6763263006647735, 'eval_F1-score': 0.7053200238365681, 'eval_runtime': 6.2716, 'eval_samples_per_second': 359.399, 'eval_steps_per_second': 11.321}


### Checkpoints of 10MTR Model

In [48]:
from peft import PeftModel  

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/models_MTR10_Chemberta/qfe8vcqo/checkpoint-1500")



In [49]:
trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


### Best model of MTR 10M model ng77zdds 1645

In [35]:
test_results_flavor = trainer.evaluate()

print("Test Results for checkpoint 1645 of ng77zdds:", test_results_flavor)

Test Results for checkpoint 1645 of ng77zdds: {'eval_mcc_metric': 0.8020882141160608, 'eval_loss': 0.1354757696390152, 'eval_model_preparation_time': 0.0055, 'eval_Accuracy': 0.8948535936113576, 'eval_AUC-ROC': 0.9712263554530509, 'eval_Precision': 0.8163329237327627, 'eval_Recall': 0.8065458720498409, 'eval_F1-score': 0.8099227080402003, 'eval_runtime': 6.2435, 'eval_samples_per_second': 361.016, 'eval_steps_per_second': 11.372}


### 2nd best

In [38]:
test_results_flavor = trainer.evaluate()

print("Test Results for checkpoint 1500 of ng77zdds:", test_results_flavor)

Test Results for checkpoint 1500 of ng77zdds: {'eval_mcc_metric': 0.7903027900598758, 'eval_loss': 0.13793590664863586, 'eval_model_preparation_time': 0.0043, 'eval_Accuracy': 0.888642413487134, 'eval_AUC-ROC': 0.9712670821306159, 'eval_Precision': 0.8297460295827033, 'eval_Recall': 0.7985800549192325, 'eval_F1-score': 0.8120801935825286, 'eval_runtime': 6.3741, 'eval_samples_per_second': 353.618, 'eval_steps_per_second': 11.139}


In [None]:
test_results_flavor = trainer.evaluate()

print("Test Results for checkpoint 1500 of qfe8vcqo:", test_results_flavor)

Test Results for checkpoint 1500 of qfe8vcqo: {'eval_mcc_metric': 0.7895870461452488, 'eval_loss': 0.1416712999343872, 'eval_model_preparation_time': 0.0042, 'eval_Accuracy': 0.888642413487134, 'eval_AUC-ROC': 0.9652783429504371, 'eval_Precision': 0.8719392292967356, 'eval_Recall': 0.7636337052785079, 'eval_F1-score': 0.7999856428943348, 'eval_runtime': 6.3872, 'eval_samples_per_second': 352.894, 'eval_steps_per_second': 11.116}


: 

## 100 MLM model

In [44]:

import os

models_dir = "./models_Mlm_10_Chemberta"

for ckpt in os.listdir(models_dir):
    model_path = os.path.join(models_dir, ckpt)
    
    if os.path.isdir(model_path):
        adapter_config_path = os.path.join(model_path, "adapter_config.json")
        
        if not os.path.exists(adapter_config_path):
            print(f"⚠️ Missing 'adapter_config.json' in {model_path}")

# Evaluate each saved model

valid_checkpoints = [
    os.path.join(models_dir, ckpt)
    for ckpt in os.listdir(models_dir)
    if os.path.isdir(os.path.join(models_dir, ckpt)) and 
       os.path.exists(os.path.join(models_dir, ckpt, "adapter_config.json"))
]

print("Valid Model Checkpoints:", valid_checkpoints)

for model_path in valid_checkpoints:
    print(f"Evaluating model: {model_path}")

    adapter_model = PeftModel.from_pretrained(base_model, model_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()
    print(f"Test Results for {model_path}: {test_results_clin}")

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Valid Model Checkpoints: ['./models_Mlm_10_Chemberta/fv1ilzi8', './models_Mlm_10_Chemberta/4fb63oti', './models_Mlm_10_Chemberta/gfgudjue', './models_Mlm_10_Chemberta/e374cax6', './models_Mlm_10_Chemberta/8jzpzyyg', './models_Mlm_10_Chemberta/ao6l1ar5', './models_Mlm_10_Chemberta/29mn1i36', './models_Mlm_10_Chemberta/v1e93rjp', './models_Mlm_10_Chemberta/4lskz3k4', './models_Mlm_10_Chemberta/6ocfxa7u']
Evaluating model: ./models_Mlm_10_Chemberta/fv1ilzi8


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/fv1ilzi8: {'eval_mcc_metric': 0.0704170504547604, 'eval_loss': 1.0308160781860352, 'eval_model_preparation_time': 0.0033, 'eval_Accuracy': 0.2067435669920142, 'eval_AUC-ROC': 0.5201085713307327, 'eval_Precision': 0.2973564404255674, 'eval_Recall': 0.22460865555170856, 'eval_F1-score': 0.15843567515925416, 'eval_runtime': 3.6606, 'eval_samples_per_second': 615.744, 'eval_steps_per_second': 19.396}
Evaluating model: ./models_Mlm_10_Chemberta/4fb63oti


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/4fb63oti: {'eval_mcc_metric': 0.740909053754227, 'eval_loss': 0.18313008546829224, 'eval_model_preparation_time': 0.0037, 'eval_Accuracy': 0.8615794143744454, 'eval_AUC-ROC': 0.9283378355112839, 'eval_Precision': 0.8270258145704948, 'eval_Recall': 0.6637824095302897, 'eval_F1-score': 0.685709578115499, 'eval_runtime': 3.5679, 'eval_samples_per_second': 631.747, 'eval_steps_per_second': 19.9}
Evaluating model: ./models_Mlm_10_Chemberta/gfgudjue


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/gfgudjue: {'eval_mcc_metric': 0.754609633972367, 'eval_loss': 0.16939516365528107, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8717834960070985, 'eval_AUC-ROC': 0.9374650956515712, 'eval_Precision': 0.8552058451573432, 'eval_Recall': 0.661818253483307, 'eval_F1-score': 0.6959998780471583, 'eval_runtime': 3.6029, 'eval_samples_per_second': 625.612, 'eval_steps_per_second': 19.707}
Evaluating model: ./models_Mlm_10_Chemberta/e374cax6


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/e374cax6: {'eval_mcc_metric': 0.7593566052351548, 'eval_loss': 0.17612887918949127, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8735581188997338, 'eval_AUC-ROC': 0.9329317680677949, 'eval_Precision': 0.8597654326088628, 'eval_Recall': 0.6692281948095298, 'eval_F1-score': 0.7013560816174796, 'eval_runtime': 3.8342, 'eval_samples_per_second': 587.872, 'eval_steps_per_second': 18.518}
Evaluating model: ./models_Mlm_10_Chemberta/8jzpzyyg


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/8jzpzyyg: {'eval_mcc_metric': 0.7606406163740841, 'eval_loss': 0.16965076327323914, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8744454303460515, 'eval_AUC-ROC': 0.9446570255887197, 'eval_Precision': 0.8584451053943859, 'eval_Recall': 0.6708489711002195, 'eval_F1-score': 0.7026687834980267, 'eval_runtime': 3.6722, 'eval_samples_per_second': 613.805, 'eval_steps_per_second': 19.335}
Evaluating model: ./models_Mlm_10_Chemberta/ao6l1ar5


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/ao6l1ar5: {'eval_mcc_metric': 0.742340558564735, 'eval_loss': 0.18377947807312012, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8624667258207631, 'eval_AUC-ROC': 0.9242068808410323, 'eval_Precision': 0.8273666539646651, 'eval_Recall': 0.6641823066854988, 'eval_F1-score': 0.6860684003517648, 'eval_runtime': 3.6803, 'eval_samples_per_second': 612.451, 'eval_steps_per_second': 19.292}
Evaluating model: ./models_Mlm_10_Chemberta/29mn1i36


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/29mn1i36: {'eval_mcc_metric': 0.7648311331409482, 'eval_loss': 0.1637691706418991, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8771073646850044, 'eval_AUC-ROC': 0.9352994369006513, 'eval_Precision': 0.8635057662593392, 'eval_Recall': 0.6714753965682861, 'eval_F1-score': 0.7058746215029943, 'eval_runtime': 3.6354, 'eval_samples_per_second': 620.01, 'eval_steps_per_second': 19.53}
Evaluating model: ./models_Mlm_10_Chemberta/v1e93rjp


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/v1e93rjp: {'eval_mcc_metric': 0.7307316602577645, 'eval_loss': 0.19169306755065918, 'eval_model_preparation_time': 0.0037, 'eval_Accuracy': 0.8589174800354925, 'eval_AUC-ROC': 0.9121644495936069, 'eval_Precision': 0.8309833144548868, 'eval_Recall': 0.6498413016992705, 'eval_F1-score': 0.6802427550062317, 'eval_runtime': 3.5312, 'eval_samples_per_second': 638.311, 'eval_steps_per_second': 20.107}
Evaluating model: ./models_Mlm_10_Chemberta/4lskz3k4


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_10_Chemberta/4lskz3k4: {'eval_mcc_metric': 0.7349980888326919, 'eval_loss': 0.19541019201278687, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8593611357586513, 'eval_AUC-ROC': 0.9138323763599596, 'eval_Precision': 0.7295496767165879, 'eval_Recall': 0.6574871971495032, 'eval_F1-score': 0.6758098650437983, 'eval_runtime': 3.6219, 'eval_samples_per_second': 622.322, 'eval_steps_per_second': 19.603}
Evaluating model: ./models_Mlm_10_Chemberta/6ocfxa7u


Test Results for ./models_Mlm_10_Chemberta/6ocfxa7u: {'eval_mcc_metric': 0.7414589304407849, 'eval_loss': 0.1874830275774002, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8633540372670807, 'eval_AUC-ROC': 0.9239020585135893, 'eval_Precision': 0.833281359845302, 'eval_Recall': 0.661965819696989, 'eval_F1-score': 0.6879138746837524, 'eval_runtime': 3.8368, 'eval_samples_per_second': 587.463, 'eval_steps_per_second': 18.505}


### Checking for all 5M MTR models

In [30]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-5M-MTR",
    num_labels=5,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-5M-MTR",trust_remote_code=True)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-5M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [31]:
from peft import PeftModel  
import os

models_dir = "./models_Mtr_5_Chemberta"

for ckpt in os.listdir(models_dir):
    model_path = os.path.join(models_dir, ckpt)
    
    if os.path.isdir(model_path):
        adapter_config_path = os.path.join(model_path, "adapter_config.json")
        
        if not os.path.exists(adapter_config_path):
            print(f"⚠️ Missing 'adapter_config.json' in {model_path}")

# Evaluate each saved model

valid_checkpoints = [
    os.path.join(models_dir, ckpt)
    for ckpt in os.listdir(models_dir)
    if os.path.isdir(os.path.join(models_dir, ckpt)) and 
       os.path.exists(os.path.join(models_dir, ckpt, "adapter_config.json"))
]

print("Valid Model Checkpoints:", valid_checkpoints)

for model_path in valid_checkpoints:
    print(f"Evaluating model: {model_path}")

    adapter_model = PeftModel.from_pretrained(base_model, model_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()
    print(f"Test Results for {model_path}: {test_results_clin}")

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


⚠️ Missing 'adapter_config.json' in ./models_Mtr_5_Chemberta/84cefzee
Valid Model Checkpoints: ['./models_Mtr_5_Chemberta/eeuaszyg', './models_Mtr_5_Chemberta/wvxxdsok', './models_Mtr_5_Chemberta/hfjmj8af', './models_Mtr_5_Chemberta/i5zckto9', './models_Mtr_5_Chemberta/24p9zgdn', './models_Mtr_5_Chemberta/io1c4ug5', './models_Mtr_5_Chemberta/nq42s50m', './models_Mtr_5_Chemberta/lfvffslg', './models_Mtr_5_Chemberta/4p2kl0le', './models_Mtr_5_Chemberta/ret7ydyc', './models_Mtr_5_Chemberta/v773pzco', './models_Mtr_5_Chemberta/qlr8qiyn', './models_Mtr_5_Chemberta/7lx28l2q', './models_Mtr_5_Chemberta/ndgq1b2l', './models_Mtr_5_Chemberta/0n7q4g0x', './models_Mtr_5_Chemberta/s53s98co', './models_Mtr_5_Chemberta/r7mxurpz', './models_Mtr_5_Chemberta/7fpw74gq', './models_Mtr_5_Chemberta/1uc5cs8k', './models_Mtr_5_Chemberta/qdtwt30e', './models_Mtr_5_Chemberta/sj3yydqn', './models_Mtr_5_Chemberta/cgefgnd3', './models_Mtr_5_Chemberta/5x17bw96', './models_Mtr_5_Chemberta/j92alz30']
Evaluating model

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/eeuaszyg: {'eval_mcc_metric': 0.0651055566688356, 'eval_loss': 1.035671353340149, 'eval_model_preparation_time': 0.0034, 'eval_Accuracy': 0.2684117125110914, 'eval_AUC-ROC': 0.6048832895929168, 'eval_Precision': 0.26998547654108057, 'eval_Recall': 0.2531751674551164, 'eval_F1-score': 0.1977021888255427, 'eval_runtime': 3.5781, 'eval_samples_per_second': 629.947, 'eval_steps_per_second': 19.843}
Evaluating model: ./models_Mtr_5_Chemberta/wvxxdsok


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/wvxxdsok: {'eval_mcc_metric': 0.787005962950063, 'eval_loss': 0.1475902944803238, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8873114463176575, 'eval_AUC-ROC': 0.9503441275025594, 'eval_Precision': 0.7425513484941237, 'eval_Recall': 0.7574074614311995, 'eval_F1-score': 0.7476545706894304, 'eval_runtime': 3.6807, 'eval_samples_per_second': 612.378, 'eval_steps_per_second': 19.29}
Evaluating model: ./models_Mtr_5_Chemberta/hfjmj8af


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/hfjmj8af: {'eval_mcc_metric': 0.7530232352002739, 'eval_loss': 0.16767391562461853, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8686779059449867, 'eval_AUC-ROC': 0.9438039631651737, 'eval_Precision': 0.7729346410476363, 'eval_Recall': 0.7038889021063186, 'eval_F1-score': 0.7271120810644203, 'eval_runtime': 3.5678, 'eval_samples_per_second': 631.769, 'eval_steps_per_second': 19.9}
Evaluating model: ./models_Mtr_5_Chemberta/i5zckto9


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/i5zckto9: {'eval_mcc_metric': 0.7390312690688539, 'eval_loss': 0.17914626002311707, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8620230700976043, 'eval_AUC-ROC': 0.947685470067124, 'eval_Precision': 0.6998397083794436, 'eval_Recall': 0.6616486525427223, 'eval_F1-score': 0.67502951197281, 'eval_runtime': 3.8152, 'eval_samples_per_second': 590.8, 'eval_steps_per_second': 18.61}
Evaluating model: ./models_Mtr_5_Chemberta/24p9zgdn


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/24p9zgdn: {'eval_mcc_metric': 0.7519646779400456, 'eval_loss': 0.17050717771053314, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8682342502218279, 'eval_AUC-ROC': 0.9496293885338144, 'eval_Precision': 0.7855679533902455, 'eval_Recall': 0.7347662492359218, 'eval_F1-score': 0.7551395927857127, 'eval_runtime': 3.556, 'eval_samples_per_second': 633.854, 'eval_steps_per_second': 19.966}
Evaluating model: ./models_Mtr_5_Chemberta/io1c4ug5


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/io1c4ug5: {'eval_mcc_metric': 0.7599998573532042, 'eval_loss': 0.162087544798851, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8740017746228926, 'eval_AUC-ROC': 0.9627824893460163, 'eval_Precision': 0.7246371076223455, 'eval_Recall': 0.6698514003493337, 'eval_F1-score': 0.6888297999620836, 'eval_runtime': 3.5605, 'eval_samples_per_second': 633.056, 'eval_steps_per_second': 19.941}
Evaluating model: ./models_Mtr_5_Chemberta/nq42s50m


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/nq42s50m: {'eval_mcc_metric': 0.7565117878623016, 'eval_loss': 0.1742604374885559, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8713398402839396, 'eval_AUC-ROC': 0.9453843378202462, 'eval_Precision': 0.6938010237202412, 'eval_Recall': 0.6710703721285134, 'eval_F1-score': 0.6806088063790013, 'eval_runtime': 3.6609, 'eval_samples_per_second': 615.703, 'eval_steps_per_second': 19.394}
Evaluating model: ./models_Mtr_5_Chemberta/lfvffslg


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/lfvffslg: {'eval_mcc_metric': 0.7486256687843692, 'eval_loss': 0.17598804831504822, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8673469387755102, 'eval_AUC-ROC': 0.94431638374553, 'eval_Precision': 0.6899161468025053, 'eval_Recall': 0.6655308585249371, 'eval_F1-score': 0.6757995478934248, 'eval_runtime': 3.66, 'eval_samples_per_second': 615.841, 'eval_steps_per_second': 19.399}
Evaluating model: ./models_Mtr_5_Chemberta/4p2kl0le


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/4p2kl0le: {'eval_mcc_metric': 0.7922832370919513, 'eval_loss': 0.13667497038841248, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8895297249334516, 'eval_AUC-ROC': 0.958335401296123, 'eval_Precision': 0.767476398219038, 'eval_Recall': 0.7331228786790375, 'eval_F1-score': 0.7469094345185043, 'eval_runtime': 3.6615, 'eval_samples_per_second': 615.592, 'eval_steps_per_second': 19.391}
Evaluating model: ./models_Mtr_5_Chemberta/ret7ydyc


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/ret7ydyc: {'eval_mcc_metric': 0.7494264204699775, 'eval_loss': 0.1744409203529358, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8673469387755102, 'eval_AUC-ROC': 0.9435746087941403, 'eval_Precision': 0.6908493771629605, 'eval_Recall': 0.6685243613426962, 'eval_F1-score': 0.6776288225484517, 'eval_runtime': 3.8181, 'eval_samples_per_second': 590.34, 'eval_steps_per_second': 18.595}
Evaluating model: ./models_Mtr_5_Chemberta/v773pzco


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/v773pzco: {'eval_mcc_metric': 0.779754385910448, 'eval_loss': 0.1472068578004837, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8819875776397516, 'eval_AUC-ROC': 0.9511220264515533, 'eval_Precision': 0.7979816011855128, 'eval_Recall': 0.8239869972179441, 'eval_F1-score': 0.8099201205367065, 'eval_runtime': 3.5807, 'eval_samples_per_second': 629.481, 'eval_steps_per_second': 19.828}
Evaluating model: ./models_Mtr_5_Chemberta/qlr8qiyn


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/qlr8qiyn: {'eval_mcc_metric': 0.7400455526713832, 'eval_loss': 0.16639114916324615, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8655723158828749, 'eval_AUC-ROC': 0.9611121483370189, 'eval_Precision': 0.8545922819696455, 'eval_Recall': 0.6497295959522315, 'eval_F1-score': 0.6898779184753373, 'eval_runtime': 3.5626, 'eval_samples_per_second': 632.679, 'eval_steps_per_second': 19.929}
Evaluating model: ./models_Mtr_5_Chemberta/7lx28l2q


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/7lx28l2q: {'eval_mcc_metric': 0.7566391514968037, 'eval_loss': 0.15640893578529358, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8726708074534162, 'eval_AUC-ROC': 0.9525040553150884, 'eval_Precision': 0.8597779745883493, 'eval_Recall': 0.6990214916973706, 'eval_F1-score': 0.7433055470674511, 'eval_runtime': 3.6618, 'eval_samples_per_second': 615.54, 'eval_steps_per_second': 19.389}
Evaluating model: ./models_Mtr_5_Chemberta/ndgq1b2l


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/ndgq1b2l: {'eval_mcc_metric': 0.7632103045045602, 'eval_loss': 0.15403041243553162, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8757763975155279, 'eval_AUC-ROC': 0.9535849490089972, 'eval_Precision': 0.8579272743363477, 'eval_Recall': 0.7040038735293056, 'eval_F1-score': 0.7462833925145368, 'eval_runtime': 3.5805, 'eval_samples_per_second': 629.519, 'eval_steps_per_second': 19.83}
Evaluating model: ./models_Mtr_5_Chemberta/0n7q4g0x


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/0n7q4g0x: {'eval_mcc_metric': 0.7659941672257725, 'eval_loss': 0.16212618350982666, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8753327417923691, 'eval_AUC-ROC': 0.9407773362619973, 'eval_Precision': 0.7587025191827304, 'eval_Recall': 0.6803051938961573, 'eval_F1-score': 0.7004362841605393, 'eval_runtime': 3.568, 'eval_samples_per_second': 631.728, 'eval_steps_per_second': 19.899}
Evaluating model: ./models_Mtr_5_Chemberta/s53s98co


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/s53s98co: {'eval_mcc_metric': 0.775463883472275, 'eval_loss': 0.14482459425926208, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8819875776397516, 'eval_AUC-ROC': 0.9687997751692006, 'eval_Precision': 0.7975165993506426, 'eval_Recall': 0.7138352326264439, 'eval_F1-score': 0.7438332882084613, 'eval_runtime': 3.6569, 'eval_samples_per_second': 616.373, 'eval_steps_per_second': 19.415}
Evaluating model: ./models_Mtr_5_Chemberta/r7mxurpz


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/r7mxurpz: {'eval_mcc_metric': 0.7705100325411581, 'eval_loss': 0.1532725840806961, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.878438331854481, 'eval_AUC-ROC': 0.9666861261421982, 'eval_Precision': 0.7843936777647682, 'eval_Recall': 0.7152037431982745, 'eval_F1-score': 0.7384913439108391, 'eval_runtime': 3.5737, 'eval_samples_per_second': 630.719, 'eval_steps_per_second': 19.867}
Evaluating model: ./models_Mtr_5_Chemberta/7fpw74gq


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/7fpw74gq: {'eval_mcc_metric': 0.7406668010187301, 'eval_loss': 0.16286855936050415, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8642413487133984, 'eval_AUC-ROC': 0.9531267111494071, 'eval_Precision': 0.7478656197722325, 'eval_Recall': 0.6559093159663213, 'eval_F1-score': 0.682981811266985, 'eval_runtime': 3.545, 'eval_samples_per_second': 635.817, 'eval_steps_per_second': 20.028}
Evaluating model: ./models_Mtr_5_Chemberta/1uc5cs8k


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/1uc5cs8k: {'eval_mcc_metric': 0.7558654399577743, 'eval_loss': 0.16676174104213715, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8708961845607809, 'eval_AUC-ROC': 0.9494609925414753, 'eval_Precision': 0.7128965792860746, 'eval_Recall': 0.6683716678444505, 'eval_F1-score': 0.6832693558897273, 'eval_runtime': 3.8096, 'eval_samples_per_second': 591.661, 'eval_steps_per_second': 18.637}
Evaluating model: ./models_Mtr_5_Chemberta/qdtwt30e


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/qdtwt30e: {'eval_mcc_metric': 0.7458383301400656, 'eval_loss': 0.16902202367782593, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9434091870347826, 'eval_Precision': 0.7325238477555442, 'eval_Recall': 0.6664578143894739, 'eval_F1-score': 0.6826702686592011, 'eval_runtime': 3.5763, 'eval_samples_per_second': 630.26, 'eval_steps_per_second': 19.853}
Evaluating model: ./models_Mtr_5_Chemberta/sj3yydqn


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/sj3yydqn: {'eval_mcc_metric': 0.7884615118588195, 'eval_loss': 0.13780535757541656, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8877551020408163, 'eval_AUC-ROC': 0.9696235837571147, 'eval_Precision': 0.7673452556415395, 'eval_Recall': 0.7286581383632396, 'eval_F1-score': 0.7435661153633066, 'eval_runtime': 3.6604, 'eval_samples_per_second': 615.779, 'eval_steps_per_second': 19.397}
Evaluating model: ./models_Mtr_5_Chemberta/cgefgnd3


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/cgefgnd3: {'eval_mcc_metric': 0.7722590521446594, 'eval_loss': 0.14912612736225128, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8802129547471162, 'eval_AUC-ROC': 0.9589468942440134, 'eval_Precision': 0.76052944188729, 'eval_Recall': 0.7119211833907053, 'eval_F1-score': 0.7318655197216989, 'eval_runtime': 3.658, 'eval_samples_per_second': 616.179, 'eval_steps_per_second': 19.409}
Evaluating model: ./models_Mtr_5_Chemberta/5x17bw96


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mtr_5_Chemberta/5x17bw96: {'eval_mcc_metric': 0.732940156486814, 'eval_loss': 0.1951296329498291, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8589174800354925, 'eval_AUC-ROC': 0.9232400794964539, 'eval_Precision': 0.6351294160479245, 'eval_Recall': 0.6204615992745854, 'eval_F1-score': 0.6239080423384326, 'eval_runtime': 3.5885, 'eval_samples_per_second': 628.116, 'eval_steps_per_second': 19.785}
Evaluating model: ./models_Mtr_5_Chemberta/j92alz30


Test Results for ./models_Mtr_5_Chemberta/j92alz30: {'eval_mcc_metric': 0.5544837921085852, 'eval_loss': 0.31705886125564575, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.782608695652174, 'eval_AUC-ROC': 0.8121271977935832, 'eval_Precision': 0.586801825020637, 'eval_Recall': 0.46280637664456503, 'eval_F1-score': 0.49256044259750764, 'eval_runtime': 3.5959, 'eval_samples_per_second': 626.832, 'eval_steps_per_second': 19.745}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Checking for checkpoints of 5M MTR 

In [96]:
from peft import PeftModel  

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/models_Mtr_5_Chemberta/ndgq1b2l/checkpoint-1645")



In [97]:
trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [98]:
test_results_flavor = trainer.evaluate()

print("Test Results for checkpoint 1645 of sj3yydqn:", test_results_flavor)

Test Results for checkpoint 1645 of sj3yydqn: {'eval_mcc_metric': 0.7810350014174989, 'eval_loss': 0.1457151621580124, 'eval_model_preparation_time': 0.0043, 'eval_Accuracy': 0.8846495119787046, 'eval_AUC-ROC': 0.9527411756627775, 'eval_Precision': 0.7988920909771744, 'eval_Recall': 0.7210630577180025, 'eval_F1-score': 0.7486025273539199, 'eval_runtime': 4.7489, 'eval_samples_per_second': 474.638, 'eval_steps_per_second': 14.951}


### 10MLM Model performance

In [75]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-77M-MTR",
    num_labels=5,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-77M-MTR",trust_remote_code=True)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MTR and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [76]:

import os



for ckpt in os.listdir(models_dir):import os
from peft import PeftModel

models_dir = "./models_MTR100_Chemberta"

def find_all_checkpoints(base_dir):
    all_checkpoints = []
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path):
            # Now look inside this folder for checkpoint-* subdirs
            for subfolder in os.listdir(folder_path):
                subfolder_path = os.path.join(folder_path, subfolder)
                if os.path.isdir(subfolder_path) and subfolder.startswith("checkpoint-"):
                    # Check for adapter_config.json
                    if os.path.exists(os.path.join(subfolder_path, "adapter_config.json")):
                        all_checkpoints.append(subfolder_path)
    return all_checkpoints

valid_checkpoints = find_all_checkpoints(models_dir)
print("🧠 Valid nested checkpoints found:", valid_checkpoints)

for checkpoint_path in valid_checkpoints:
    checkpoint_name = os.path.basename(checkpoint_path)
    parent_folder = os.path.basename(os.path.dirname(checkpoint_path))

    print(f"\n🔍 Evaluating model: {parent_folder}/{checkpoint_name}")

    adapter_model = PeftModel.from_pretrained(base_model, checkpoint_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()

    print(f"📌 Test Results for {parent_folder}/{checkpoint_name}: {test_results_clin}")

    model_path = os.path.join(models_dir, ckpt)
    
    if os.path.isdir(model_path):
        adapter_config_path = os.path.join(model_path, "adapter_config.json")
        
        if not os.path.exists(adapter_config_path):
            print(f"⚠️ Missing 'adapter_config.json' in {model_path}")

# Evaluate each saved model

valid_checkpoints = [
    os.path.join(models_dir, ckpt)
    for ckpt in os.listdir(models_dir)
    if os.path.isdir(os.path.join(models_dir, ckpt)) and 
       os.path.exists(os.path.join(models_dir, ckpt, "adapter_config.json"))
]

print("Valid Model Checkpoints:", valid_checkpoints)

for model_path in valid_checkpoints:
    print(f"Evaluating model: {model_path}")

    adapter_model = PeftModel.from_pretrained(base_model, model_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()
    print(f"Test Results for {model_path}: {test_results_clin}")

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🧠 Valid nested checkpoints found: ['./models_MTR100_Chemberta/ujye8pur/checkpoint-500', './models_MTR100_Chemberta/glzr8jim/checkpoint-1500', './models_MTR100_Chemberta/glzr8jim/checkpoint-1000', './models_MTR100_Chemberta/glzr8jim/checkpoint-500', './models_MTR100_Chemberta/glzr8jim/checkpoint-1645', './models_MTR100_Chemberta/y6ioij00/checkpoint-1500', './models_MTR100_Chemberta/y6ioij00/checkpoint-1000', './models_MTR100_Chemberta/y6ioij00/checkpoint-500', './models_MTR100_Chemberta/y6ioij00/checkpoint-1645', './models_MTR100_Chemberta/5k0m1ybg/checkpoint-1500', './models_MTR100_Chemberta/5k0m1ybg/checkpoint-1000', './models_MTR100_Chemberta/5k0m1ybg/checkpoint-500', './models_MTR100_Chemberta/5k0m1ybg/checkpoint-1645', './models_MTR100_Chemberta/imo6nm9x/checkpoint-1500', './models_MTR100_Chemberta/imo6nm9x/checkpoint-1000', './models_MTR100_Chemberta/imo6nm9x/checkpoint-500', './models_MTR100_Chemberta/imo6nm9x/checkpoint-1645', './models_MTR100_Chemberta/to3uvb1h/checkpoint-1500'

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for ujye8pur/checkpoint-500: {'eval_mcc_metric': 0.19421155282376865, 'eval_loss': 0.9777860045433044, 'eval_Accuracy': 0.48535936113575867, 'eval_AUC-ROC': 0.6205852690220697, 'eval_Precision': 0.2693402384519366, 'eval_Recall': 0.32627241350001607, 'eval_F1-score': 0.236805625235501, 'eval_runtime': 2.9214, 'eval_samples_per_second': 771.552, 'eval_steps_per_second': 6.161}

🔍 Evaluating model: glzr8jim/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for glzr8jim/checkpoint-1500: {'eval_mcc_metric': 0.7834343375170396, 'eval_loss': 0.13969959318637848, 'eval_Accuracy': 0.8855368234250222, 'eval_AUC-ROC': 0.9549969661367305, 'eval_Precision': 0.7988102389221967, 'eval_Recall': 0.7907065709696487, 'eval_F1-score': 0.7944742733644798, 'eval_runtime': 3.3104, 'eval_samples_per_second': 680.883, 'eval_steps_per_second': 5.437}

🔍 Evaluating model: glzr8jim/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for glzr8jim/checkpoint-1000: {'eval_mcc_metric': 0.7561777901437662, 'eval_loss': 0.157387375831604, 'eval_Accuracy': 0.8731144631765749, 'eval_AUC-ROC': 0.9607157091940854, 'eval_Precision': 0.7586056774144084, 'eval_Recall': 0.6631932680711806, 'eval_F1-score': 0.6919552713312738, 'eval_runtime': 2.9928, 'eval_samples_per_second': 753.14, 'eval_steps_per_second': 6.014}

🔍 Evaluating model: glzr8jim/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for glzr8jim/checkpoint-500: {'eval_mcc_metric': 0.7502513988290391, 'eval_loss': 0.18352656066417694, 'eval_Accuracy': 0.865128660159716, 'eval_AUC-ROC': 0.9459783526571478, 'eval_Precision': 0.732178217647446, 'eval_Recall': 0.6760370566249214, 'eval_F1-score': 0.6856137635277199, 'eval_runtime': 2.9859, 'eval_samples_per_second': 754.879, 'eval_steps_per_second': 6.028}

🔍 Evaluating model: glzr8jim/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for glzr8jim/checkpoint-1645: {'eval_mcc_metric': 0.7857732041076384, 'eval_loss': 0.13933634757995605, 'eval_Accuracy': 0.8864241348713399, 'eval_AUC-ROC': 0.9560709660479298, 'eval_Precision': 0.80451841570831, 'eval_Recall': 0.7921601853640271, 'eval_F1-score': 0.7971128505954014, 'eval_runtime': 3.3059, 'eval_samples_per_second': 681.806, 'eval_steps_per_second': 5.445}

🔍 Evaluating model: y6ioij00/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for y6ioij00/checkpoint-1500: {'eval_mcc_metric': 0.7777237717003995, 'eval_loss': 0.13908487558364868, 'eval_Accuracy': 0.8824312333629104, 'eval_AUC-ROC': 0.9680355972198766, 'eval_Precision': 0.8615301215900842, 'eval_Recall': 0.755471099116446, 'eval_F1-score': 0.7915827963618229, 'eval_runtime': 2.995, 'eval_samples_per_second': 752.576, 'eval_steps_per_second': 6.01}

🔍 Evaluating model: y6ioij00/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for y6ioij00/checkpoint-1000: {'eval_mcc_metric': 0.7412615894514512, 'eval_loss': 0.15623323619365692, 'eval_Accuracy': 0.8664596273291926, 'eval_AUC-ROC': 0.9657289955449591, 'eval_Precision': 0.8556133179258886, 'eval_Recall': 0.6498670110123272, 'eval_F1-score': 0.6913256232325058, 'eval_runtime': 2.9885, 'eval_samples_per_second': 754.219, 'eval_steps_per_second': 6.023}

🔍 Evaluating model: y6ioij00/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for y6ioij00/checkpoint-500: {'eval_mcc_metric': 0.7432949433893207, 'eval_loss': 0.17865656316280365, 'eval_Accuracy': 0.8615794143744454, 'eval_AUC-ROC': 0.9521815768854778, 'eval_Precision': 0.6944408816073097, 'eval_Recall': 0.6691210670043418, 'eval_F1-score': 0.6751547117438224, 'eval_runtime': 2.9818, 'eval_samples_per_second': 755.93, 'eval_steps_per_second': 6.037}

🔍 Evaluating model: y6ioij00/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for y6ioij00/checkpoint-1645: {'eval_mcc_metric': 0.7799976714498148, 'eval_loss': 0.1390303522348404, 'eval_Accuracy': 0.883318544809228, 'eval_AUC-ROC': 0.9668998755927947, 'eval_Precision': 0.8650642614069216, 'eval_Recall': 0.7556980372899021, 'eval_F1-score': 0.7927498574132986, 'eval_runtime': 2.9755, 'eval_samples_per_second': 757.529, 'eval_steps_per_second': 6.049}

🔍 Evaluating model: 5k0m1ybg/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 5k0m1ybg/checkpoint-1500: {'eval_mcc_metric': 0.7861823500814078, 'eval_loss': 0.1503613293170929, 'eval_Accuracy': 0.8868677905944987, 'eval_AUC-ROC': 0.9555982712852101, 'eval_Precision': 0.7837498821907072, 'eval_Recall': 0.7571685483823567, 'eval_F1-score': 0.7692619624826265, 'eval_runtime': 3.2681, 'eval_samples_per_second': 689.702, 'eval_steps_per_second': 5.508}

🔍 Evaluating model: 5k0m1ybg/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 5k0m1ybg/checkpoint-1000: {'eval_mcc_metric': 0.7528682456747839, 'eval_loss': 0.1641027331352234, 'eval_Accuracy': 0.8717834960070985, 'eval_AUC-ROC': 0.9484502428965049, 'eval_Precision': 0.8560778135534199, 'eval_Recall': 0.6621800926560468, 'eval_F1-score': 0.6980971135064987, 'eval_runtime': 2.982, 'eval_samples_per_second': 755.863, 'eval_steps_per_second': 6.036}

🔍 Evaluating model: 5k0m1ybg/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 5k0m1ybg/checkpoint-500: {'eval_mcc_metric': 0.7536075650181511, 'eval_loss': 0.18033510446548462, 'eval_Accuracy': 0.867790594498669, 'eval_AUC-ROC': 0.9435619422200482, 'eval_Precision': 0.749063591964226, 'eval_Recall': 0.6726338501362976, 'eval_F1-score': 0.6904755347758996, 'eval_runtime': 2.9422, 'eval_samples_per_second': 766.1, 'eval_steps_per_second': 6.118}

🔍 Evaluating model: 5k0m1ybg/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 5k0m1ybg/checkpoint-1645: {'eval_mcc_metric': 0.7947615088853518, 'eval_loss': 0.14564938843250275, 'eval_Accuracy': 0.8913043478260869, 'eval_AUC-ROC': 0.9556414960002446, 'eval_Precision': 0.7784190145841964, 'eval_Recall': 0.763899668305314, 'eval_F1-score': 0.7692187918551457, 'eval_runtime': 2.951, 'eval_samples_per_second': 763.808, 'eval_steps_per_second': 6.1}

🔍 Evaluating model: imo6nm9x/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for imo6nm9x/checkpoint-1500: {'eval_mcc_metric': 0.7818169758422309, 'eval_loss': 0.1437530368566513, 'eval_Accuracy': 0.8850931677018633, 'eval_AUC-ROC': 0.9721605705286832, 'eval_Precision': 0.8250201990356656, 'eval_Recall': 0.7875592578624443, 'eval_F1-score': 0.8045247463831717, 'eval_runtime': 3.2846, 'eval_samples_per_second': 686.23, 'eval_steps_per_second': 5.48}

🔍 Evaluating model: imo6nm9x/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for imo6nm9x/checkpoint-1000: {'eval_mcc_metric': 0.7505830110360489, 'eval_loss': 0.16083496809005737, 'eval_Accuracy': 0.870452528837622, 'eval_AUC-ROC': 0.9649679384016391, 'eval_Precision': 0.8573939402955351, 'eval_Recall': 0.6589279419307597, 'eval_F1-score': 0.6970335804335622, 'eval_runtime': 2.9609, 'eval_samples_per_second': 761.255, 'eval_steps_per_second': 6.079}

🔍 Evaluating model: imo6nm9x/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for imo6nm9x/checkpoint-500: {'eval_mcc_metric': 0.7526104369480295, 'eval_loss': 0.1792353391647339, 'eval_Accuracy': 0.867790594498669, 'eval_AUC-ROC': 0.9531487492655296, 'eval_Precision': 0.6839922258902702, 'eval_Recall': 0.6768026837451615, 'eval_F1-score': 0.6792147873649024, 'eval_runtime': 3.2876, 'eval_samples_per_second': 685.599, 'eval_steps_per_second': 5.475}

🔍 Evaluating model: imo6nm9x/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for imo6nm9x/checkpoint-1645: {'eval_mcc_metric': 0.7882497847517572, 'eval_loss': 0.14135079085826874, 'eval_Accuracy': 0.8873114463176575, 'eval_AUC-ROC': 0.9720325211171271, 'eval_Precision': 0.8327510094500526, 'eval_Recall': 0.8274682544764185, 'eval_F1-score': 0.8297152462405265, 'eval_runtime': 2.9562, 'eval_samples_per_second': 762.455, 'eval_steps_per_second': 6.089}

🔍 Evaluating model: to3uvb1h/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for to3uvb1h/checkpoint-1500: {'eval_mcc_metric': 0.777782885687969, 'eval_loss': 0.1434350311756134, 'eval_Accuracy': 0.883318544809228, 'eval_AUC-ROC': 0.9714154098726826, 'eval_Precision': 0.8247383778991484, 'eval_Recall': 0.7833793596443682, 'eval_F1-score': 0.8022087198646425, 'eval_runtime': 3.3207, 'eval_samples_per_second': 678.777, 'eval_steps_per_second': 5.421}

🔍 Evaluating model: to3uvb1h/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for to3uvb1h/checkpoint-1000: {'eval_mcc_metric': 0.7526312450898183, 'eval_loss': 0.15572887659072876, 'eval_Accuracy': 0.8717834960070985, 'eval_AUC-ROC': 0.9603426928577126, 'eval_Precision': 0.7568937122921888, 'eval_Recall': 0.6577760954882745, 'eval_F1-score': 0.6888927540741175, 'eval_runtime': 2.9924, 'eval_samples_per_second': 753.238, 'eval_steps_per_second': 6.015}

🔍 Evaluating model: to3uvb1h/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for to3uvb1h/checkpoint-500: {'eval_mcc_metric': 0.7433824366848646, 'eval_loss': 0.18622320890426636, 'eval_Accuracy': 0.8624667258207631, 'eval_AUC-ROC': 0.9491061139912048, 'eval_Precision': 0.6958470209767611, 'eval_Recall': 0.6681240824198336, 'eval_F1-score': 0.6759382185055587, 'eval_runtime': 2.9908, 'eval_samples_per_second': 753.656, 'eval_steps_per_second': 6.019}

🔍 Evaluating model: to3uvb1h/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for to3uvb1h/checkpoint-1645: {'eval_mcc_metric': 0.7893589818962506, 'eval_loss': 0.14277541637420654, 'eval_Accuracy': 0.888642413487134, 'eval_AUC-ROC': 0.970534896759912, 'eval_Precision': 0.8309968335234739, 'eval_Recall': 0.7920133444927788, 'eval_F1-score': 0.8095269496517847, 'eval_runtime': 3.3192, 'eval_samples_per_second': 679.087, 'eval_steps_per_second': 5.423}

🔍 Evaluating model: oggmqthu/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for oggmqthu/checkpoint-1500: {'eval_mcc_metric': 0.7646476032969329, 'eval_loss': 0.15173858404159546, 'eval_Accuracy': 0.8748890860692103, 'eval_AUC-ROC': 0.9539073098699481, 'eval_Precision': 0.7699535609880498, 'eval_Recall': 0.7440526568986802, 'eval_F1-score': 0.7558138359370141, 'eval_runtime': 2.9863, 'eval_samples_per_second': 754.787, 'eval_steps_per_second': 6.028}

🔍 Evaluating model: oggmqthu/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for oggmqthu/checkpoint-1000: {'eval_mcc_metric': 0.7385925481100566, 'eval_loss': 0.17853213846683502, 'eval_Accuracy': 0.8620230700976043, 'eval_AUC-ROC': 0.952035520418581, 'eval_Precision': 0.738291124479222, 'eval_Recall': 0.6535935188237787, 'eval_F1-score': 0.677905482451869, 'eval_runtime': 2.984, 'eval_samples_per_second': 755.37, 'eval_steps_per_second': 6.032}

🔍 Evaluating model: oggmqthu/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for oggmqthu/checkpoint-500: {'eval_mcc_metric': 0.7426713014278017, 'eval_loss': 0.20437933504581451, 'eval_Accuracy': 0.8615794143744454, 'eval_AUC-ROC': 0.944137863291773, 'eval_Precision': 0.7301470060169304, 'eval_Recall': 0.669373528503963, 'eval_F1-score': 0.6826231496184025, 'eval_runtime': 3.3044, 'eval_samples_per_second': 682.111, 'eval_steps_per_second': 5.447}

🔍 Evaluating model: oggmqthu/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for oggmqthu/checkpoint-1645: {'eval_mcc_metric': 0.7667172633940723, 'eval_loss': 0.14829595386981964, 'eval_Accuracy': 0.8757763975155279, 'eval_AUC-ROC': 0.9531141891298681, 'eval_Precision': 0.7546311912638144, 'eval_Recall': 0.746212206833805, 'eval_F1-score': 0.7496316603246458, 'eval_runtime': 2.9929, 'eval_samples_per_second': 753.106, 'eval_steps_per_second': 6.014}

🔍 Evaluating model: vx2zkfhq/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for vx2zkfhq/checkpoint-500: {'eval_mcc_metric': 0.7460191561959748, 'eval_loss': 0.18280988931655884, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9526404638451791, 'eval_Precision': 0.6994605300786482, 'eval_Recall': 0.666295517711146, 'eval_F1-score': 0.6761989540170544, 'eval_runtime': 2.9386, 'eval_samples_per_second': 767.039, 'eval_steps_per_second': 6.125}

🔍 Evaluating model: tl30zdlb/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for tl30zdlb/checkpoint-1000: {'eval_mcc_metric': 0.7353863792389076, 'eval_loss': 0.15578249096870422, 'eval_Accuracy': 0.8637976929902396, 'eval_AUC-ROC': 0.9687349144078918, 'eval_Precision': 0.8553351091006247, 'eval_Recall': 0.6467957836508158, 'eval_F1-score': 0.6888694618672135, 'eval_runtime': 2.9402, 'eval_samples_per_second': 766.625, 'eval_steps_per_second': 6.122}

🔍 Evaluating model: tl30zdlb/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for tl30zdlb/checkpoint-500: {'eval_mcc_metric': 0.7628025408739891, 'eval_loss': 0.17303045094013214, 'eval_Accuracy': 0.8708961845607809, 'eval_AUC-ROC': 0.9574226925519712, 'eval_Precision': 0.6897947167167306, 'eval_Recall': 0.6845226184087301, 'eval_F1-score': 0.6832200096471022, 'eval_runtime': 2.9382, 'eval_samples_per_second': 767.123, 'eval_steps_per_second': 6.126}

🔍 Evaluating model: t6motadp/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for t6motadp/checkpoint-1500: {'eval_mcc_metric': 0.776442822237734, 'eval_loss': 0.14893807470798492, 'eval_Accuracy': 0.8819875776397516, 'eval_AUC-ROC': 0.9697350251900344, 'eval_Precision': 0.7593816467773177, 'eval_Recall': 0.6838207946505889, 'eval_F1-score': 0.7042818114602781, 'eval_runtime': 2.9415, 'eval_samples_per_second': 766.275, 'eval_steps_per_second': 6.119}

🔍 Evaluating model: t6motadp/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for t6motadp/checkpoint-1000: {'eval_mcc_metric': 0.7434281295722228, 'eval_loss': 0.16649644076824188, 'eval_Accuracy': 0.8669032830523514, 'eval_AUC-ROC': 0.9327607881911548, 'eval_Precision': 0.7533437807611343, 'eval_Recall': 0.6527979285484278, 'eval_F1-score': 0.6835916064247971, 'eval_runtime': 2.9347, 'eval_samples_per_second': 768.064, 'eval_steps_per_second': 6.134}

🔍 Evaluating model: t6motadp/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for t6motadp/checkpoint-500: {'eval_mcc_metric': 0.7212500930895972, 'eval_loss': 0.22483740746974945, 'eval_Accuracy': 0.8527062999112689, 'eval_AUC-ROC': 0.9137968485924197, 'eval_Precision': 0.6934293181375896, 'eval_Recall': 0.6475057130933363, 'eval_F1-score': 0.6638213090494984, 'eval_runtime': 2.9338, 'eval_samples_per_second': 768.275, 'eval_steps_per_second': 6.135}

🔍 Evaluating model: t6motadp/checkpoint-1645


📌 Test Results for t6motadp/checkpoint-1645: {'eval_mcc_metric': 0.7816993453854211, 'eval_loss': 0.14186696708202362, 'eval_Accuracy': 0.8846495119787046, 'eval_AUC-ROC': 0.9697139680969613, 'eval_Precision': 0.7635851304227466, 'eval_Recall': 0.686487387957943, 'eval_F1-score': 0.7073148113378649, 'eval_runtime': 2.9284, 'eval_samples_per_second': 769.709, 'eval_steps_per_second': 6.147}
Valid Model Checkpoints: ['./models_MTR100_Chemberta/glzr8jim', './models_MTR100_Chemberta/y6ioij00', './models_MTR100_Chemberta/imo6nm9x', './models_MTR100_Chemberta/to3uvb1h', './models_MTR100_Chemberta/oggmqthu']
Evaluating model: ./models_MTR100_Chemberta/glzr8jim


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/glzr8jim: {'eval_mcc_metric': 0.7857732041076384, 'eval_loss': 0.13933634757995605, 'eval_Accuracy': 0.8864241348713399, 'eval_AUC-ROC': 0.9560709660479298, 'eval_Precision': 0.80451841570831, 'eval_Recall': 0.7921601853640271, 'eval_F1-score': 0.7971128505954014, 'eval_runtime': 2.9807, 'eval_samples_per_second': 756.188, 'eval_steps_per_second': 6.039}
Evaluating model: ./models_MTR100_Chemberta/y6ioij00


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/y6ioij00: {'eval_mcc_metric': 0.7777237717003995, 'eval_loss': 0.13908487558364868, 'eval_Accuracy': 0.8824312333629104, 'eval_AUC-ROC': 0.9680355972198766, 'eval_Precision': 0.8615301215900842, 'eval_Recall': 0.755471099116446, 'eval_F1-score': 0.7915827963618229, 'eval_runtime': 3.3141, 'eval_samples_per_second': 680.133, 'eval_steps_per_second': 5.431}
Evaluating model: ./models_MTR100_Chemberta/imo6nm9x


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/imo6nm9x: {'eval_mcc_metric': 0.7818169758422309, 'eval_loss': 0.1437530368566513, 'eval_Accuracy': 0.8850931677018633, 'eval_AUC-ROC': 0.9721605705286832, 'eval_Precision': 0.8250201990356656, 'eval_Recall': 0.7875592578624443, 'eval_F1-score': 0.8045247463831717, 'eval_runtime': 2.9361, 'eval_samples_per_second': 767.688, 'eval_steps_per_second': 6.131}
Evaluating model: ./models_MTR100_Chemberta/to3uvb1h


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/to3uvb1h: {'eval_mcc_metric': 0.777782885687969, 'eval_loss': 0.1434350311756134, 'eval_Accuracy': 0.883318544809228, 'eval_AUC-ROC': 0.9714154098726826, 'eval_Precision': 0.8247383778991484, 'eval_Recall': 0.7833793596443682, 'eval_F1-score': 0.8022087198646425, 'eval_runtime': 2.9953, 'eval_samples_per_second': 752.507, 'eval_steps_per_second': 6.009}
Evaluating model: ./models_MTR100_Chemberta/oggmqthu


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_MTR100_Chemberta/oggmqthu: {'eval_mcc_metric': 0.7667172633940723, 'eval_loss': 0.14829595386981964, 'eval_Accuracy': 0.8757763975155279, 'eval_AUC-ROC': 0.9531141891298681, 'eval_Precision': 0.7546311912638144, 'eval_Recall': 0.746212206833805, 'eval_F1-score': 0.7496316603246458, 'eval_runtime': 3.3239, 'eval_samples_per_second': 678.117, 'eval_steps_per_second': 5.415}


## 77M MLM Model

In [22]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    "DeepChem/ChemBERTa-77M-MLM",
    num_labels=5,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

tokenizer = AutoTokenizer.from_pretrained("DeepChem/ChemBERTa-77M-MLM",trust_remote_code=True)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [36]:
training_args = TrainingArguments(
    output_dir="./test_results_flavor",
    per_device_eval_batch_size=32,
    report_to="none",  # Disable logging to W&B for test
    

)


In [None]:
import os
from peft import PeftModel

models_dir = "./models_BBBP_chemberta_focal_loss"

def find_all_checkpoints(base_dir):
    all_checkpoints = []
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path):
            for subfolder in os.listdir(folder_path):
                subfolder_path = os.path.join(folder_path, subfolder)
                if os.path.isdir(subfolder_path) and subfolder.startswith("checkpoint-"):
                    if os.path.exists(os.path.join(subfolder_path, "adapter_config.json")):
                        all_checkpoints.append(subfolder_path)
    return all_checkpoints

valid_checkpoints = find_all_checkpoints(models_dir)
print("🧠 Valid nested checkpoints found:", valid_checkpoints)

for checkpoint_path in valid_checkpoints:
    checkpoint_name = os.path.basename(checkpoint_path)
    parent_folder = os.path.basename(os.path.dirname(checkpoint_path))

    print(f"\n🔍 Evaluating model: {parent_folder}/{checkpoint_name}")

    adapter_model = PeftModel.from_pretrained(base_model, checkpoint_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()

    auc_score = test_results_clin.get("eval/AUC_ROC", 0)
    if auc_score > 0.95:
        print(f"✅ AUC_ROC > 0.95 for {parent_folder}/{checkpoint_name}")
        print(f"📌 Test Results: {test_results_clin}")
    else:
        print(f"❌ Skipping {parent_folder}/{checkpoint_name} (AUC_ROC = {auc_score:.4f})")


🧠 Valid nested checkpoints found: ['./models_Mlm_100_Chemberta/xutmnj12/checkpoint-1500', './models_Mlm_100_Chemberta/xutmnj12/checkpoint-1000', './models_Mlm_100_Chemberta/xutmnj12/checkpoint-500', './models_Mlm_100_Chemberta/xutmnj12/checkpoint-1645', './models_Mlm_100_Chemberta/sz247wpr/checkpoint-658', './models_Mlm_100_Chemberta/sz247wpr/checkpoint-500', './models_Mlm_100_Chemberta/ikd7q7fk/checkpoint-658', './models_Mlm_100_Chemberta/ikd7q7fk/checkpoint-500', './models_Mlm_100_Chemberta/kovlgv1s/checkpoint-1500', './models_Mlm_100_Chemberta/kovlgv1s/checkpoint-1000', './models_Mlm_100_Chemberta/kovlgv1s/checkpoint-500', './models_Mlm_100_Chemberta/kovlgv1s/checkpoint-1645', './models_Mlm_100_Chemberta/k0sfrn0p/checkpoint-1500', './models_Mlm_100_Chemberta/k0sfrn0p/checkpoint-1000', './models_Mlm_100_Chemberta/k0sfrn0p/checkpoint-500', './models_Mlm_100_Chemberta/k0sfrn0p/checkpoint-1645', './models_Mlm_100_Chemberta/0yvjid5t/checkpoint-658', './models_Mlm_100_Chemberta/0yvjid5t/c

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for xutmnj12/checkpoint-1500: {'eval_mcc_metric': 0.7515927803916811, 'eval_loss': 0.19172461330890656, 'eval_Accuracy': 0.8700088731144632, 'eval_AUC-ROC': 0.9528106082436698, 'eval_Precision': 0.7540780668366062, 'eval_Recall': 0.726382307268234, 'eval_F1-score': 0.7377056675422857, 'eval_runtime': 3.1689, 'eval_samples_per_second': 711.282, 'eval_steps_per_second': 5.68}

🔍 Evaluating model: xutmnj12/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for xutmnj12/checkpoint-1000: {'eval_mcc_metric': 0.7381556177411126, 'eval_loss': 0.20143990218639374, 'eval_Accuracy': 0.8655723158828749, 'eval_AUC-ROC': 0.9462037248705334, 'eval_Precision': 0.8562507949004768, 'eval_Recall': 0.6418617688488828, 'eval_F1-score': 0.6859070010245756, 'eval_runtime': 2.9426, 'eval_samples_per_second': 765.981, 'eval_steps_per_second': 6.117}

🔍 Evaluating model: xutmnj12/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for xutmnj12/checkpoint-500: {'eval_mcc_metric': 0.7338536537715541, 'eval_loss': 0.20480243861675262, 'eval_Accuracy': 0.8584738243123337, 'eval_AUC-ROC': 0.9371736676552583, 'eval_Precision': 0.7093752379429545, 'eval_Recall': 0.7840665209955991, 'eval_F1-score': 0.7235717842777554, 'eval_runtime': 3.1783, 'eval_samples_per_second': 709.176, 'eval_steps_per_second': 5.663}

🔍 Evaluating model: xutmnj12/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for xutmnj12/checkpoint-1645: {'eval_mcc_metric': 0.7521464915364129, 'eval_loss': 0.19328723847866058, 'eval_Accuracy': 0.870452528837622, 'eval_AUC-ROC': 0.9531784124231235, 'eval_Precision': 0.7572555794128578, 'eval_Recall': 0.7250657272294483, 'eval_F1-score': 0.7380485089709764, 'eval_runtime': 3.1824, 'eval_samples_per_second': 708.267, 'eval_steps_per_second': 5.656}

🔍 Evaluating model: sz247wpr/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for sz247wpr/checkpoint-658: {'eval_mcc_metric': 0.7233006450863193, 'eval_loss': 0.19794334471225739, 'eval_Accuracy': 0.8553682342502218, 'eval_AUC-ROC': 0.9324020126587597, 'eval_Precision': 0.8399824452915114, 'eval_Recall': 0.6400624603647049, 'eval_F1-score': 0.6758858845455952, 'eval_runtime': 2.9396, 'eval_samples_per_second': 766.783, 'eval_steps_per_second': 6.123}

🔍 Evaluating model: sz247wpr/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for sz247wpr/checkpoint-500: {'eval_mcc_metric': 0.72430838982655, 'eval_loss': 0.2104184478521347, 'eval_Accuracy': 0.854924578527063, 'eval_AUC-ROC': 0.910169070347604, 'eval_Precision': 0.8260430948474914, 'eval_Recall': 0.6436679844119826, 'eval_F1-score': 0.6732625055831842, 'eval_runtime': 3.1797, 'eval_samples_per_second': 708.88, 'eval_steps_per_second': 5.661}

🔍 Evaluating model: ikd7q7fk/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for ikd7q7fk/checkpoint-658: {'eval_mcc_metric': 0.7419793246099474, 'eval_loss': 0.19249001145362854, 'eval_Accuracy': 0.865128660159716, 'eval_AUC-ROC': 0.9296432847797422, 'eval_Precision': 0.852755204025884, 'eval_Recall': 0.6866640621349751, 'eval_F1-score': 0.7300430956971153, 'eval_runtime': 3.1562, 'eval_samples_per_second': 714.16, 'eval_steps_per_second': 5.703}

🔍 Evaluating model: ikd7q7fk/checkpoint-500


📌 Test Results for ikd7q7fk/checkpoint-500: {'eval_mcc_metric': 0.7409555692039872, 'eval_loss': 0.19144411385059357, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9266973256544127, 'eval_Precision': 0.8465550895920702, 'eval_Recall': 0.688897911223715, 'eval_F1-score': 0.7312272209981807, 'eval_runtime': 2.9172, 'eval_samples_per_second': 772.668, 'eval_steps_per_second': 6.17}

🔍 Evaluating model: kovlgv1s/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for kovlgv1s/checkpoint-1500: {'eval_mcc_metric': 0.7342137564076765, 'eval_loss': 0.19446489214897156, 'eval_Accuracy': 0.860248447204969, 'eval_AUC-ROC': 0.9519202263025145, 'eval_Precision': 0.6878685077459297, 'eval_Recall': 0.7149259559251673, 'eval_F1-score': 0.6903306505006462, 'eval_runtime': 3.2187, 'eval_samples_per_second': 700.279, 'eval_steps_per_second': 5.592}

🔍 Evaluating model: kovlgv1s/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for kovlgv1s/checkpoint-1000: {'eval_mcc_metric': 0.7084513048537422, 'eval_loss': 0.20086570084095, 'eval_Accuracy': 0.8513753327417923, 'eval_AUC-ROC': 0.9494769024472159, 'eval_Precision': 0.7947958503476438, 'eval_Recall': 0.6820920079787726, 'eval_F1-score': 0.7249915354431786, 'eval_runtime': 2.9872, 'eval_samples_per_second': 754.547, 'eval_steps_per_second': 6.026}

🔍 Evaluating model: kovlgv1s/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for kovlgv1s/checkpoint-500: {'eval_mcc_metric': 0.7423314990336061, 'eval_loss': 0.19883593916893005, 'eval_Accuracy': 0.8633540372670807, 'eval_AUC-ROC': 0.9242379470524774, 'eval_Precision': 0.6957648110549656, 'eval_Recall': 0.6932927716767097, 'eval_F1-score': 0.6908755589345399, 'eval_runtime': 3.223, 'eval_samples_per_second': 699.357, 'eval_steps_per_second': 5.585}

🔍 Evaluating model: kovlgv1s/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for kovlgv1s/checkpoint-1645: {'eval_mcc_metric': 0.7320377251817916, 'eval_loss': 0.1969209909439087, 'eval_Accuracy': 0.8598047914818101, 'eval_AUC-ROC': 0.9525929438784001, 'eval_Precision': 0.6922687690070904, 'eval_Recall': 0.7125012345914886, 'eval_F1-score': 0.6906020488842198, 'eval_runtime': 2.985, 'eval_samples_per_second': 755.103, 'eval_steps_per_second': 6.03}

🔍 Evaluating model: k0sfrn0p/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for k0sfrn0p/checkpoint-1500: {'eval_mcc_metric': 0.745377240742711, 'eval_loss': 0.18867076933383942, 'eval_Accuracy': 0.8673469387755102, 'eval_AUC-ROC': 0.9240643387138823, 'eval_Precision': 0.7169286661753869, 'eval_Recall': 0.6532359392276617, 'eval_F1-score': 0.6757555231363336, 'eval_runtime': 3.2263, 'eval_samples_per_second': 698.629, 'eval_steps_per_second': 5.579}

🔍 Evaluating model: k0sfrn0p/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


📌 Test Results for k0sfrn0p/checkpoint-1000: {'eval_mcc_metric': 0.6296497063594445, 'eval_loss': 0.22113291919231415, 'eval_Accuracy': 0.8149955634427685, 'eval_AUC-ROC': 0.874255501709448, 'eval_Precision': 0.6065668467631984, 'eval_Recall': 0.518895796382332, 'eval_F1-score': 0.540227755940425, 'eval_runtime': 2.9818, 'eval_samples_per_second': 755.921, 'eval_steps_per_second': 6.037}

🔍 Evaluating model: k0sfrn0p/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for k0sfrn0p/checkpoint-500: {'eval_mcc_metric': 0.7065710674113834, 'eval_loss': 0.2449447214603424, 'eval_Accuracy': 0.8438331854480923, 'eval_AUC-ROC': 0.88074611426543, 'eval_Precision': 0.7243193756222379, 'eval_Recall': 0.6292032897665498, 'eval_F1-score': 0.6424112873123978, 'eval_runtime': 3.2235, 'eval_samples_per_second': 699.25, 'eval_steps_per_second': 5.584}

🔍 Evaluating model: k0sfrn0p/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for k0sfrn0p/checkpoint-1645: {'eval_mcc_metric': 0.7538564530911973, 'eval_loss': 0.1873597800731659, 'eval_Accuracy': 0.8695652173913043, 'eval_AUC-ROC': 0.9243576636357957, 'eval_Precision': 0.8489141840021978, 'eval_Recall': 0.6637955022600265, 'eval_F1-score': 0.6929673332064148, 'eval_runtime': 2.9879, 'eval_samples_per_second': 754.382, 'eval_steps_per_second': 6.024}

🔍 Evaluating model: 0yvjid5t/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 0yvjid5t/checkpoint-658: {'eval_mcc_metric': 0.7327720416734685, 'eval_loss': 0.20283129811286926, 'eval_Accuracy': 0.8606921029281278, 'eval_AUC-ROC': 0.9453031939096954, 'eval_Precision': 0.8439377639193936, 'eval_Recall': 0.6475590019460279, 'eval_F1-score': 0.6836970646724254, 'eval_runtime': 2.9227, 'eval_samples_per_second': 771.212, 'eval_steps_per_second': 6.159}

🔍 Evaluating model: 0yvjid5t/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 0yvjid5t/checkpoint-500: {'eval_mcc_metric': 0.7316784674933826, 'eval_loss': 0.20233118534088135, 'eval_Accuracy': 0.8589174800354925, 'eval_AUC-ROC': 0.9223233605218709, 'eval_Precision': 0.6996524483783776, 'eval_Recall': 0.6491223952561614, 'eval_F1-score': 0.6678992357486079, 'eval_runtime': 3.1526, 'eval_samples_per_second': 714.957, 'eval_steps_per_second': 5.71}

🔍 Evaluating model: d1wrlvdu/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


📌 Test Results for d1wrlvdu/checkpoint-500: {'eval_mcc_metric': 0.694985160868272, 'eval_loss': 0.25309643149375916, 'eval_Accuracy': 0.8389529724933452, 'eval_AUC-ROC': 0.8535039425802043, 'eval_Precision': 0.6059986950156162, 'eval_Recall': 0.5882115468648133, 'eval_F1-score': 0.5930071107005246, 'eval_runtime': 2.9107, 'eval_samples_per_second': 774.379, 'eval_steps_per_second': 6.184}

🔍 Evaluating model: d6a28ks6/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for d6a28ks6/checkpoint-658: {'eval_mcc_metric': 0.7219500064808291, 'eval_loss': 0.20311683416366577, 'eval_Accuracy': 0.8558118899733806, 'eval_AUC-ROC': 0.9445070946033788, 'eval_Precision': 0.8383799440884262, 'eval_Recall': 0.638232313462664, 'eval_F1-score': 0.6765530437087939, 'eval_runtime': 2.9839, 'eval_samples_per_second': 755.397, 'eval_steps_per_second': 6.032}

🔍 Evaluating model: d6a28ks6/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for d6a28ks6/checkpoint-500: {'eval_mcc_metric': 0.7142658195288292, 'eval_loss': 0.2040163278579712, 'eval_Accuracy': 0.8504880212954747, 'eval_AUC-ROC': 0.9447150623847612, 'eval_Precision': 0.6930504488037323, 'eval_Recall': 0.6671395409479339, 'eval_F1-score': 0.6786870690645163, 'eval_runtime': 2.9781, 'eval_samples_per_second': 756.853, 'eval_steps_per_second': 6.044}

🔍 Evaluating model: 3fdjf7s7/checkpoint-658


📌 Test Results for 3fdjf7s7/checkpoint-658: {'eval_mcc_metric': 0.7165065043347795, 'eval_loss': 0.2090887576341629, 'eval_Accuracy': 0.8513753327417923, 'eval_AUC-ROC': 0.9391260199982971, 'eval_Precision': 0.7388555397131931, 'eval_Recall': 0.6339252090059346, 'eval_F1-score': 0.6635488189383238, 'eval_runtime': 3.1771, 'eval_samples_per_second': 709.442, 'eval_steps_per_second': 5.665}

🔍 Evaluating model: 3fdjf7s7/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 3fdjf7s7/checkpoint-500: {'eval_mcc_metric': 0.7278679315371784, 'eval_loss': 0.20962630212306976, 'eval_Accuracy': 0.8553682342502218, 'eval_AUC-ROC': 0.9297219471707138, 'eval_Precision': 0.6818073034918186, 'eval_Recall': 0.6461433161519332, 'eval_F1-score': 0.6592594070258537, 'eval_runtime': 2.9439, 'eval_samples_per_second': 765.64, 'eval_steps_per_second': 6.114}

🔍 Evaluating model: 6a8xs6i0/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 6a8xs6i0/checkpoint-500: {'eval_mcc_metric': 0.742203637990562, 'eval_loss': 0.226716086268425, 'eval_Accuracy': 0.8606921029281278, 'eval_AUC-ROC': 0.9024235768169208, 'eval_Precision': 0.6410628327425308, 'eval_Recall': 0.624204679684316, 'eval_F1-score': 0.6253995330432411, 'eval_runtime': 3.2279, 'eval_samples_per_second': 698.282, 'eval_steps_per_second': 5.576}

🔍 Evaluating model: a99qvoss/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for a99qvoss/checkpoint-658: {'eval_mcc_metric': 0.733985401320564, 'eval_loss': 0.19938620924949646, 'eval_Accuracy': 0.8606921029281278, 'eval_AUC-ROC': 0.9334784404298844, 'eval_Precision': 0.8489034919817877, 'eval_Recall': 0.6464780594862665, 'eval_F1-score': 0.6796664281542979, 'eval_runtime': 3.1519, 'eval_samples_per_second': 715.135, 'eval_steps_per_second': 5.711}

🔍 Evaluating model: a99qvoss/checkpoint-500


📌 Test Results for a99qvoss/checkpoint-500: {'eval_mcc_metric': 0.7353105575740809, 'eval_loss': 0.19767917692661285, 'eval_Accuracy': 0.8615794143744454, 'eval_AUC-ROC': 0.9288132989607399, 'eval_Precision': 0.848757178461699, 'eval_Recall': 0.6479163943793445, 'eval_F1-score': 0.6816673651939139, 'eval_runtime': 2.9138, 'eval_samples_per_second': 773.558, 'eval_steps_per_second': 6.177}

🔍 Evaluating model: q6ab3hnx/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for q6ab3hnx/checkpoint-658: {'eval_mcc_metric': 0.7463880380697544, 'eval_loss': 0.1934467852115631, 'eval_Accuracy': 0.8673469387755102, 'eval_AUC-ROC': 0.9471037509253183, 'eval_Precision': 0.8515282295264454, 'eval_Recall': 0.6568764134754077, 'eval_F1-score': 0.6912447962506661, 'eval_runtime': 3.1781, 'eval_samples_per_second': 709.232, 'eval_steps_per_second': 5.664}

🔍 Evaluating model: q6ab3hnx/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for q6ab3hnx/checkpoint-500: {'eval_mcc_metric': 0.7407640141242543, 'eval_loss': 0.19885306060314178, 'eval_Accuracy': 0.862910381543922, 'eval_AUC-ROC': 0.932307721697087, 'eval_Precision': 0.8362694945279083, 'eval_Recall': 0.6581162563794549, 'eval_F1-score': 0.6866106391759275, 'eval_runtime': 3.181, 'eval_samples_per_second': 708.593, 'eval_steps_per_second': 5.659}

🔍 Evaluating model: jbxogsm2/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for jbxogsm2/checkpoint-1500: {'eval_mcc_metric': 0.7366823570456721, 'eval_loss': 0.19648517668247223, 'eval_Accuracy': 0.8620230700976043, 'eval_AUC-ROC': 0.944459296784161, 'eval_Precision': 0.7088419652290514, 'eval_Recall': 0.683373528264271, 'eval_F1-score': 0.6940615523780366, 'eval_runtime': 2.926, 'eval_samples_per_second': 770.327, 'eval_steps_per_second': 6.152}

🔍 Evaluating model: jbxogsm2/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for jbxogsm2/checkpoint-1000: {'eval_mcc_metric': 0.7249778769328371, 'eval_loss': 0.20493312180042267, 'eval_Accuracy': 0.8589174800354925, 'eval_AUC-ROC': 0.9430401702420589, 'eval_Precision': 0.7862552697219171, 'eval_Recall': 0.6689705797043175, 'eval_F1-score': 0.7130367229243604, 'eval_runtime': 3.1598, 'eval_samples_per_second': 713.331, 'eval_steps_per_second': 5.697}

🔍 Evaluating model: jbxogsm2/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for jbxogsm2/checkpoint-500: {'eval_mcc_metric': 0.7455775650378911, 'eval_loss': 0.20152515172958374, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9270770721930675, 'eval_Precision': 0.6816616896853674, 'eval_Recall': 0.694608822153824, 'eval_F1-score': 0.6822061180937639, 'eval_runtime': 2.9205, 'eval_samples_per_second': 771.79, 'eval_steps_per_second': 6.163}

🔍 Evaluating model: gi5kecmi/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for gi5kecmi/checkpoint-658: {'eval_mcc_metric': 0.7416033240064804, 'eval_loss': 0.18928112089633942, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9494673914241305, 'eval_Precision': 0.7175521682937753, 'eval_Recall': 0.6539430110345903, 'eval_F1-score': 0.6754301456803111, 'eval_runtime': 3.165, 'eval_samples_per_second': 712.16, 'eval_steps_per_second': 5.687}

🔍 Evaluating model: gi5kecmi/checkpoint-500


📌 Test Results for gi5kecmi/checkpoint-500: {'eval_mcc_metric': 0.739163857433399, 'eval_loss': 0.19696369767189026, 'eval_Accuracy': 0.8620230700976043, 'eval_AUC-ROC': 0.9272159007751398, 'eval_Precision': 0.6702897839037956, 'eval_Recall': 0.6566129634462633, 'eval_F1-score': 0.6619341230160769, 'eval_runtime': 2.915, 'eval_samples_per_second': 773.249, 'eval_steps_per_second': 6.175}

🔍 Evaluating model: u9d6q2lo/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  trainer = CustomTrainer(


📌 Test Results for u9d6q2lo/checkpoint-658: {'eval_mcc_metric': 0.724672108248464, 'eval_loss': 0.19758668541908264, 'eval_Accuracy': 0.8562555456965395, 'eval_AUC-ROC': 0.9213624695017598, 'eval_Precision': 0.6312109435891815, 'eval_Recall': 0.605718593996085, 'eval_F1-score': 0.6136877227016605, 'eval_runtime': 3.22, 'eval_samples_per_second': 699.996, 'eval_steps_per_second': 5.59}

🔍 Evaluating model: u9d6q2lo/checkpoint-500


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for u9d6q2lo/checkpoint-500: {'eval_mcc_metric': 0.7167517251598591, 'eval_loss': 0.2149065136909485, 'eval_Accuracy': 0.8504880212954747, 'eval_AUC-ROC': 0.9020879677136187, 'eval_Precision': 0.722033614061327, 'eval_Recall': 0.6381268330734974, 'eval_F1-score': 0.658734379036996, 'eval_runtime': 2.989, 'eval_samples_per_second': 754.105, 'eval_steps_per_second': 6.022}

🔍 Evaluating model: wt5mgigm/checkpoint-1500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for wt5mgigm/checkpoint-1500: {'eval_mcc_metric': 0.7556197394027053, 'eval_loss': 0.18587109446525574, 'eval_Accuracy': 0.8708961845607809, 'eval_AUC-ROC': 0.9508821803277767, 'eval_Precision': 0.724016556454012, 'eval_Recall': 0.6982829700966494, 'eval_F1-score': 0.7098701024520846, 'eval_runtime': 3.2207, 'eval_samples_per_second': 699.839, 'eval_steps_per_second': 5.589}

🔍 Evaluating model: wt5mgigm/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for wt5mgigm/checkpoint-1000: {'eval_mcc_metric': 0.7148359404765081, 'eval_loss': 0.20748455822467804, 'eval_Accuracy': 0.8482697426796806, 'eval_AUC-ROC': 0.927417529009593, 'eval_Precision': 0.6617804071296437, 'eval_Recall': 0.6541302066798323, 'eval_F1-score': 0.6575570881276073, 'eval_runtime': 2.9865, 'eval_samples_per_second': 754.731, 'eval_steps_per_second': 6.027}

🔍 Evaluating model: wt5mgigm/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for wt5mgigm/checkpoint-500: {'eval_mcc_metric': 0.7081470860470594, 'eval_loss': 0.21620403230190277, 'eval_Accuracy': 0.8442768411712511, 'eval_AUC-ROC': 0.9315793239021748, 'eval_Precision': 0.8126900435741338, 'eval_Recall': 0.6440294612920517, 'eval_F1-score': 0.6673688901517647, 'eval_runtime': 2.9814, 'eval_samples_per_second': 756.024, 'eval_steps_per_second': 6.037}

🔍 Evaluating model: wt5mgigm/checkpoint-1645


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for wt5mgigm/checkpoint-1645: {'eval_mcc_metric': 0.7612697330008528, 'eval_loss': 0.1838589459657669, 'eval_Accuracy': 0.8717834960070985, 'eval_AUC-ROC': 0.9468502303345275, 'eval_Precision': 0.6952379489019463, 'eval_Recall': 0.7114263953722755, 'eval_F1-score': 0.7013062277732849, 'eval_runtime': 3.2209, 'eval_samples_per_second': 699.803, 'eval_steps_per_second': 5.588}

🔍 Evaluating model: 532smrg2/checkpoint-1000


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 532smrg2/checkpoint-1000: {'eval_mcc_metric': 0.736670685430862, 'eval_loss': 0.20298810303211212, 'eval_Accuracy': 0.862910381543922, 'eval_AUC-ROC': 0.9488226028013358, 'eval_Precision': 0.7658943134071283, 'eval_Recall': 0.7140026853910473, 'eval_F1-score': 0.7361749798036902, 'eval_runtime': 2.9784, 'eval_samples_per_second': 756.771, 'eval_steps_per_second': 6.043}

🔍 Evaluating model: 532smrg2/checkpoint-500


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for 532smrg2/checkpoint-500: {'eval_mcc_metric': 0.734268880398551, 'eval_loss': 0.20883527398109436, 'eval_Accuracy': 0.8589174800354925, 'eval_AUC-ROC': 0.9440477414069232, 'eval_Precision': 0.7009575272273703, 'eval_Recall': 0.7516299954479507, 'eval_F1-score': 0.7085234353197551, 'eval_runtime': 2.9781, 'eval_samples_per_second': 756.847, 'eval_steps_per_second': 6.044}

🔍 Evaluating model: gbyjoy6e/checkpoint-658


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📌 Test Results for gbyjoy6e/checkpoint-658: {'eval_mcc_metric': 0.7242799948767075, 'eval_loss': 0.2091398537158966, 'eval_Accuracy': 0.8553682342502218, 'eval_AUC-ROC': 0.9401006451281212, 'eval_Precision': 0.6406431158116671, 'eval_Recall': 0.6096868249240435, 'eval_F1-score': 0.6204827610222969, 'eval_runtime': 2.935, 'eval_samples_per_second': 767.966, 'eval_steps_per_second': 6.133}

🔍 Evaluating model: gbyjoy6e/checkpoint-500


📌 Test Results for gbyjoy6e/checkpoint-500: {'eval_mcc_metric': 0.7299963331063521, 'eval_loss': 0.2147645652294159, 'eval_Accuracy': 0.8553682342502218, 'eval_AUC-ROC': 0.8971433581892038, 'eval_Precision': 0.7331086934361826, 'eval_Recall': 0.6519809960114861, 'eval_F1-score': 0.6698237032430857, 'eval_runtime': 2.9354, 'eval_samples_per_second': 767.86, 'eval_steps_per_second': 6.132}


In [43]:
from peft import PeftModel  
import os

models_dir = "./models_Mlm_100_Chemberta"

for ckpt in os.listdir(models_dir):
    model_path = os.path.join(models_dir, ckpt)
    
    if os.path.isdir(model_path):
        adapter_config_path = os.path.join(model_path, "adapter_config.json")
        
        if not os.path.exists(adapter_config_path):
            print(f"⚠️ Missing 'adapter_config.json' in {model_path}")

# Evaluate each saved model

valid_checkpoints = [
    os.path.join(models_dir, ckpt)
    for ckpt in os.listdir(models_dir)
    if os.path.isdir(os.path.join(models_dir, ckpt)) and 
       os.path.exists(os.path.join(models_dir, ckpt, "adapter_config.json"))
]

print("Valid Model Checkpoints:", valid_checkpoints)

for model_path in valid_checkpoints:
    print(f"Evaluating model: {model_path}")
    checkpoint_name = os.path.basename(model_path)
    

    model_files = os.listdir(model_path)

     # Identify key model files (e.g., adapter weights)
    adapter_files = [f for f in model_files if f.endswith(".bin")]
    adapter_file_used = adapter_files[0] if adapter_files else "❌ No .bin file found"

    print(f"\n🔍 Evaluating model folder: {checkpoint_name}")
    print(f"📦 Adapter file being loaded: {adapter_file_used}")

    adapter_model = PeftModel.from_pretrained(base_model, model_path)
    adapter_model.eval()

    trainer = CustomTrainer(
        model=adapter_model,
        args=training_args,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    test_results_clin = trainer.evaluate()
    print(f"Test Results for {model_path}: {test_results_clin}")

  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Valid Model Checkpoints: ['./models_Mlm_100_Chemberta/xutmnj12', './models_Mlm_100_Chemberta/sz247wpr', './models_Mlm_100_Chemberta/ikd7q7fk', './models_Mlm_100_Chemberta/kovlgv1s', './models_Mlm_100_Chemberta/k0sfrn0p', './models_Mlm_100_Chemberta/0yvjid5t', './models_Mlm_100_Chemberta/d1wrlvdu', './models_Mlm_100_Chemberta/d6a28ks6', './models_Mlm_100_Chemberta/3fdjf7s7', './models_Mlm_100_Chemberta/6a8xs6i0', './models_Mlm_100_Chemberta/a99qvoss', './models_Mlm_100_Chemberta/q6ab3hnx', './models_Mlm_100_Chemberta/jbxogsm2', './models_Mlm_100_Chemberta/gi5kecmi', './models_Mlm_100_Chemberta/u9d6q2lo', './models_Mlm_100_Chemberta/wt5mgigm', './models_Mlm_100_Chemberta/532smrg2', './models_Mlm_100_Chemberta/gbyjoy6e']
Evaluating model: ./models_Mlm_100_Chemberta/xutmnj12

🔍 Evaluating model folder: xutmnj12
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/xutmnj12: {'eval_mcc_metric': 0.7521464915364129, 'eval_loss': 0.19328723847866058, 'eval_Accuracy': 0.870452528837622, 'eval_AUC-ROC': 0.9531784124231235, 'eval_Precision': 0.7572555794128578, 'eval_Recall': 0.7250657272294483, 'eval_F1-score': 0.7380485089709764, 'eval_runtime': 2.939, 'eval_samples_per_second': 766.931, 'eval_steps_per_second': 6.125}
Evaluating model: ./models_Mlm_100_Chemberta/sz247wpr

🔍 Evaluating model folder: sz247wpr
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/sz247wpr: {'eval_mcc_metric': 0.72430838982655, 'eval_loss': 0.2104184478521347, 'eval_Accuracy': 0.854924578527063, 'eval_AUC-ROC': 0.910169070347604, 'eval_Precision': 0.8260430948474914, 'eval_Recall': 0.6436679844119826, 'eval_F1-score': 0.6732625055831842, 'eval_runtime': 3.1773, 'eval_samples_per_second': 709.397, 'eval_steps_per_second': 5.665}
Evaluating model: ./models_Mlm_100_Chemberta/ikd7q7fk

🔍 Evaluating model folder: ikd7q7fk
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/ikd7q7fk: {'eval_mcc_metric': 0.7409555692039872, 'eval_loss': 0.19144411385059357, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9266973256544127, 'eval_Precision': 0.8465550895920702, 'eval_Recall': 0.688897911223715, 'eval_F1-score': 0.7312272209981807, 'eval_runtime': 2.919, 'eval_samples_per_second': 772.191, 'eval_steps_per_second': 6.167}
Evaluating model: ./models_Mlm_100_Chemberta/kovlgv1s

🔍 Evaluating model folder: kovlgv1s
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/kovlgv1s: {'eval_mcc_metric': 0.7320377251817916, 'eval_loss': 0.1969209909439087, 'eval_Accuracy': 0.8598047914818101, 'eval_AUC-ROC': 0.9525929438784001, 'eval_Precision': 0.6922687690070904, 'eval_Recall': 0.7125012345914886, 'eval_F1-score': 0.6906020488842198, 'eval_runtime': 2.9804, 'eval_samples_per_second': 756.274, 'eval_steps_per_second': 6.039}
Evaluating model: ./models_Mlm_100_Chemberta/k0sfrn0p

🔍 Evaluating model folder: k0sfrn0p
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/k0sfrn0p: {'eval_mcc_metric': 0.745377240742711, 'eval_loss': 0.18867076933383942, 'eval_Accuracy': 0.8673469387755102, 'eval_AUC-ROC': 0.9240643387138823, 'eval_Precision': 0.7169286661753869, 'eval_Recall': 0.6532359392276617, 'eval_F1-score': 0.6757555231363336, 'eval_runtime': 3.2183, 'eval_samples_per_second': 700.36, 'eval_steps_per_second': 5.593}
Evaluating model: ./models_Mlm_100_Chemberta/0yvjid5t

🔍 Evaluating model folder: 0yvjid5t
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/0yvjid5t: {'eval_mcc_metric': 0.7327720416734685, 'eval_loss': 0.20283129811286926, 'eval_Accuracy': 0.8606921029281278, 'eval_AUC-ROC': 0.9453031939096954, 'eval_Precision': 0.8439377639193936, 'eval_Recall': 0.6475590019460279, 'eval_F1-score': 0.6836970646724254, 'eval_runtime': 2.9167, 'eval_samples_per_second': 772.799, 'eval_steps_per_second': 6.171}
Evaluating model: ./models_Mlm_100_Chemberta/d1wrlvdu

🔍 Evaluating model folder: d1wrlvdu
📦 Adapter file being loaded: training_args.bin


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/d1wrlvdu: {'eval_mcc_metric': 0.6269658000980638, 'eval_loss': 0.24145101010799408, 'eval_Accuracy': 0.8096716947648624, 'eval_AUC-ROC': 0.8861460530494141, 'eval_Precision': 0.592236401678622, 'eval_Recall': 0.5398703314437439, 'eval_F1-score': 0.5469782734245088, 'eval_runtime': 2.9127, 'eval_samples_per_second': 773.866, 'eval_steps_per_second': 6.18}
Evaluating model: ./models_Mlm_100_Chemberta/d6a28ks6

🔍 Evaluating model folder: d6a28ks6
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/d6a28ks6: {'eval_mcc_metric': 0.7142658195288292, 'eval_loss': 0.2040163278579712, 'eval_Accuracy': 0.8504880212954747, 'eval_AUC-ROC': 0.9447150623847612, 'eval_Precision': 0.6930504488037323, 'eval_Recall': 0.6671395409479339, 'eval_F1-score': 0.6786870690645163, 'eval_runtime': 2.9806, 'eval_samples_per_second': 756.232, 'eval_steps_per_second': 6.039}
Evaluating model: ./models_Mlm_100_Chemberta/3fdjf7s7

🔍 Evaluating model folder: 3fdjf7s7
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/3fdjf7s7: {'eval_mcc_metric': 0.7165065043347795, 'eval_loss': 0.2090887576341629, 'eval_Accuracy': 0.8513753327417923, 'eval_AUC-ROC': 0.9391260199982971, 'eval_Precision': 0.7388555397131931, 'eval_Recall': 0.6339252090059346, 'eval_F1-score': 0.6635488189383238, 'eval_runtime': 2.9308, 'eval_samples_per_second': 769.085, 'eval_steps_per_second': 6.142}
Evaluating model: ./models_Mlm_100_Chemberta/6a8xs6i0

🔍 Evaluating model folder: 6a8xs6i0
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/6a8xs6i0: {'eval_mcc_metric': 0.7352735242634216, 'eval_loss': 0.20702017843723297, 'eval_Accuracy': 0.8598047914818101, 'eval_AUC-ROC': 0.907016671296347, 'eval_Precision': 0.8399836132368446, 'eval_Recall': 0.6510461399244443, 'eval_F1-score': 0.6785944801507757, 'eval_runtime': 3.2219, 'eval_samples_per_second': 699.588, 'eval_steps_per_second': 5.587}
Evaluating model: ./models_Mlm_100_Chemberta/a99qvoss

🔍 Evaluating model folder: a99qvoss
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/a99qvoss: {'eval_mcc_metric': 0.733985401320564, 'eval_loss': 0.19938620924949646, 'eval_Accuracy': 0.8606921029281278, 'eval_AUC-ROC': 0.9334784404298844, 'eval_Precision': 0.8489034919817877, 'eval_Recall': 0.6464780594862665, 'eval_F1-score': 0.6796664281542979, 'eval_runtime': 2.9052, 'eval_samples_per_second': 775.856, 'eval_steps_per_second': 6.196}
Evaluating model: ./models_Mlm_100_Chemberta/q6ab3hnx

🔍 Evaluating model folder: q6ab3hnx
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/q6ab3hnx: {'eval_mcc_metric': 0.7463880380697544, 'eval_loss': 0.1934467852115631, 'eval_Accuracy': 0.8673469387755102, 'eval_AUC-ROC': 0.9471037509253183, 'eval_Precision': 0.8515282295264454, 'eval_Recall': 0.6568764134754077, 'eval_F1-score': 0.6912447962506661, 'eval_runtime': 2.9537, 'eval_samples_per_second': 763.117, 'eval_steps_per_second': 6.094}
Evaluating model: ./models_Mlm_100_Chemberta/jbxogsm2

🔍 Evaluating model folder: jbxogsm2
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/jbxogsm2: {'eval_mcc_metric': 0.7455775650378911, 'eval_loss': 0.20152515172958374, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9270770721930675, 'eval_Precision': 0.6816616896853674, 'eval_Recall': 0.694608822153824, 'eval_F1-score': 0.6822061180937639, 'eval_runtime': 3.1684, 'eval_samples_per_second': 711.393, 'eval_steps_per_second': 5.681}
Evaluating model: ./models_Mlm_100_Chemberta/gi5kecmi

🔍 Evaluating model folder: gi5kecmi
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/gi5kecmi: {'eval_mcc_metric': 0.7416033240064804, 'eval_loss': 0.18928112089633942, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9494673914241305, 'eval_Precision': 0.7175521682937753, 'eval_Recall': 0.6539430110345903, 'eval_F1-score': 0.6754301456803111, 'eval_runtime': 3.1611, 'eval_samples_per_second': 713.04, 'eval_steps_per_second': 5.694}
Evaluating model: ./models_Mlm_100_Chemberta/u9d6q2lo

🔍 Evaluating model folder: u9d6q2lo
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/u9d6q2lo: {'eval_mcc_metric': 0.7167517251598591, 'eval_loss': 0.2149065136909485, 'eval_Accuracy': 0.8504880212954747, 'eval_AUC-ROC': 0.9020879677136187, 'eval_Precision': 0.722033614061327, 'eval_Recall': 0.6381268330734974, 'eval_F1-score': 0.658734379036996, 'eval_runtime': 2.9835, 'eval_samples_per_second': 755.483, 'eval_steps_per_second': 6.033}
Evaluating model: ./models_Mlm_100_Chemberta/wt5mgigm

🔍 Evaluating model folder: wt5mgigm
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/wt5mgigm: {'eval_mcc_metric': 0.7556197394027053, 'eval_loss': 0.18587109446525574, 'eval_Accuracy': 0.8708961845607809, 'eval_AUC-ROC': 0.9508821803277767, 'eval_Precision': 0.724016556454012, 'eval_Recall': 0.6982829700966494, 'eval_F1-score': 0.7098701024520846, 'eval_runtime': 3.237, 'eval_samples_per_second': 696.318, 'eval_steps_per_second': 5.561}
Evaluating model: ./models_Mlm_100_Chemberta/532smrg2

🔍 Evaluating model folder: 532smrg2
📦 Adapter file being loaded: training_args.bin


  trainer = CustomTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Test Results for ./models_Mlm_100_Chemberta/532smrg2: {'eval_mcc_metric': 0.7463711208398681, 'eval_loss': 0.199150949716568, 'eval_Accuracy': 0.8655723158828749, 'eval_AUC-ROC': 0.9486051174719886, 'eval_Precision': 0.7437086216358899, 'eval_Recall': 0.7269904582435949, 'eval_F1-score': 0.7321706546551738, 'eval_runtime': 3.2236, 'eval_samples_per_second': 699.208, 'eval_steps_per_second': 5.584}
Evaluating model: ./models_Mlm_100_Chemberta/gbyjoy6e

🔍 Evaluating model folder: gbyjoy6e
📦 Adapter file being loaded: training_args.bin


Test Results for ./models_Mlm_100_Chemberta/gbyjoy6e: {'eval_mcc_metric': 0.7299963331063521, 'eval_loss': 0.2147645652294159, 'eval_Accuracy': 0.8553682342502218, 'eval_AUC-ROC': 0.8971433581892038, 'eval_Precision': 0.7331086934361826, 'eval_Recall': 0.6519809960114861, 'eval_F1-score': 0.6698237032430857, 'eval_runtime': 2.9481, 'eval_samples_per_second': 764.549, 'eval_steps_per_second': 6.106}


## Best of all Models

### Best of 77M MLM Model: 

'eval_mcc_metric': 0.743591092208974, 'eval_loss': 0.19872386753559113, 'eval_model_preparation_time': 0.0038, 'eval_Accuracy': 0.8646850044365573, 'eval_AUC-ROC': 0.9008098793247992, 'eval_Precision': 0.8414375896683746, 'eval_Recall': 0.6564035372933563, 'eval_F1-score': 0.6835677143059256,

### Best of 10M MLM Model: 

{'eval_mcc_metric': 0.7783196473274908, 'eval_loss': 0.15798771381378174, 'eval_Accuracy': 0.883318544809228, 'eval_AUC-ROC': 0.9487068975033708, 'eval_Precision': 0.7174260972868147, 'eval_Recall': 0.7161449075205971, 'eval_F1-score': 0.714283627299657}

### Best of 77M MTR

'eval_mcc_metric': 0.7882497847517572, 'eval_loss': 0.14135079085826874, 'eval_Accuracy': 0.8873114463176575, 'eval_AUC-ROC': 0.9720325211171271, 'eval_Precision': 0.8327510094500526, 'eval_Recall': 0.8274682544764185, 'eval_F1-score': 0.8297152462405265


### Best of 10M MTR
'eval_mcc_metric': 0.8020882141160608, 'eval_loss': 0.1354757696390152, 'eval_model_preparation_time': 0.0055, 'eval_Accuracy': 0.8948535936113576, 'eval_AUC-ROC': 0.9712263554530509, 'eval_Precision': 0.8163329237327627, 'eval_Recall': 0.8065458720498409, 'eval_F1-score': 0.8099227080402003,

'eval_mcc_metric': 0.7903027900598758, 'eval_loss': 0.13793590664863586, 'eval_model_preparation_time': 0.0043, 'eval_Accuracy': 0.888642413487134, 'eval_AUC-ROC': 0.9712670821306159, 'eval_Precision': 0.8297460295827033, 'eval_Recall': 0.7985800549192325, 'eval_F1-score': 0.8120801935825286,

### Best of 5M MTR

'eval_mcc_metric': 0.7922832370919513, 'eval_loss': 0.13667497038841248, 'eval_model_preparation_time': 0.0039, 'eval_Accuracy': 0.8895297249334516, 'eval_AUC-ROC': 0.958335401296123, 'eval_Precision': 0.767476398219038, 'eval_Recall': 0.7331228786790375, 'eval_F1-score': 0.7469094345185043

###  With the model chemberta_v1_ the performance was worse relatively

## Load and Merge the best model

In [62]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    'DeepChem/ChemBERTa-10M-MLM',
    num_labels=5,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at DeepChem/ChemBERTa-10M-MLM and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [63]:
from peft import PeftModel  

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/models_Mlm_10_Chemberta/ad5xf1ya/checkpoint-1500")



In [64]:
final_model_chemberta_10M_MLM= adapter_model.merge_and_unload()

### Save model to Chemberta finetuned model lora 100M MTR

In [6]:
save_path = "/home/raghvendra2/Molformer_Finetuning/chemberta_final_model_lora_100M_MTR"

final_model_chemberta_77M_MTR.save_pretrained(save_path)



### Save Model for 77M MLM

In [49]:
save_path = "/home/raghvendra2/Molformer_Finetuning/chemberta_final_model_lora_77M_MLM"

final_model_chemberta_77M_MLM.save_pretrained(save_path) 

### save Model for 10M MLM

In [65]:
save_path = "/home/raghvendra2/Molformer_Finetuning/chemberta_final_model_lora_10M_MLM"

final_model_chemberta_10M_MLM.save_pretrained(save_path)

### Save Model for 10M MTR

In [None]:
save_path = "/home/raghvendra2/Molformer_Finetuning/chemberta_final_model_lora_10M_MTR"

final_model_chemberta_10M_MTR.save_pretrained(save_path)

### Save model for 5M MTR

In [21]:
save_path = "/home/raghvendra2/Molformer_Finetuning/chemberta_final_model_lora_5M_MTR"

final_model_chemberta_5M_MTR.save_pretrained(save_path)

In [7]:
# Count total parameters
total_params = sum(p.numel() for p in final_model_chemberta_77M_MTR.parameters())
# Count trainable parameters
trainable_params = sum(p.numel() for p in final_model_chemberta_77M_MTR.parameters() if p.requires_grad)

print(f"Total Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}")

Total Parameters: 3,429,365
Trainable Parameters: 149,765


In [39]:
total_params = sum(p.numel() for p in base_model.parameters())
print(total_params)

83454725
