## Clintox

## Clintox Molformer LoRA

In [None]:
import torch

torch.cuda.empty_cache()

### Loading dataset

In [None]:
import pandas as pd

train_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_train.csv')
val_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_valid.csv')

In [None]:

clin_sub=train_clin.drop(['FDA_APPROVED','smiles'],axis=1)

In [None]:
train_clin.shape

(1185, 3)

In [None]:
train_clin.head()

Unnamed: 0,smiles,FDA_APPROVED,CT_TOX
0,[C@@H]1([C@@H]([C@@H]([C@H]([C@@H]([C@@H]1Cl)C...,1,0
1,[H]/[NH+]=C(/C1=CC(=O)/C(=C\C=c2ccc(=C([NH3+])...,1,0
2,[H]/[NH+]=C(\N)/c1ccc(cc1)OCCCCCOc2ccc(cc2)/C(...,1,0
3,[N+](=O)([O-])[O-],1,0
4,[NH4][Pt]([NH4])(Cl)Cl,1,0


### Checking for Null values

In [None]:
val_clin.isna().sum()

smiles          0
FDA_APPROVED    0
CT_TOX          0
dtype: int64

## Loading Tokenizer and Classification Model

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the tokenizer
tokenizer_clin = AutoTokenizer.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    trust_remote_code=True
)

# Load the model with a classification head
model_clin = AutoModelForSequenceClassification.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True
)



Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
import numpy as np

label_counts = clin_sub.sum(axis=0)  
total_samples = clin_sub.shape[0]    


label_distribution = label_counts / total_samples

for i, freq in enumerate(label_distribution):
    print(f"Label {i}: {label_counts[i]} positive samples ({freq:.2%} of total)")


Label 0: 91 positive samples (7.68% of total)


  print(f"Label {i}: {label_counts[i]} positive samples ({freq:.2%} of total)")


### Inspecting the Model Architecture

In [None]:
print(model_clin)

### Preparing Training and Validation Dataset for Training

In [None]:
smiles_list_clin = train_clin['smiles'].tolist()
smiles_val_clin=val_clin['smiles'].tolist()
train_tokenized_clin=tokenizer_clin(smiles_list_clin)
val_tokenized_clin=tokenizer_clin(smiles_val_clin)


In [None]:
train_tokenized_clin

In [None]:
from datasets import Dataset
train_dataset_clin = Dataset.from_dict(train_tokenized_clin)
val_dataset_clin = Dataset.from_dict(val_tokenized_clin)

train_dataset_clin

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 1185
})

In [None]:
train_labels_clin = train_clin['CT_TOX'].tolist() # Assuming tasks start from column 1
val_labels_clin = val_clin['CT_TOX'].tolist()

In [None]:
train_dataset_clin = train_dataset_clin.add_column("labels", train_labels_clin)
val_dataset_clin = val_dataset_clin.add_column("labels", val_labels_clin)
train_dataset_clin

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 1185
})

## Applying LoRA Finetuning

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    task_type="SEQ_CLS",  # Sequence classification task
    r=64,  # Rank of LoRA matrices
    lora_alpha=16,  # Scaling factor double of rank( from the rule of thumb)
    target_modules='all-linear',
    lora_dropout=0  # Dropout rate
    #init_lora_weights="gaussian"
)

model_train = get_peft_model(model_clin, lora_config)

# change the target_modules

In [None]:
model_train.print_trainable_parameters()

trainable params: 8,506,498 || all params: 54,310,148 || trainable%: 15.6628


### Defining Training Arguments

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./results_clin",
    evaluation_strategy="epoch",
    learning_rate=1.4628449108931757e-05,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=30,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs_clin",
    logging_strategy="steps",
    logging_steps=100,
    report_to="tensorboard",
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_mcc_metric",
    greater_is_better=True
    #push_to_hub=True,  # Automatically push to Hugging Face Hub
    #hub_model_id="HarshaH21/LoRA_Tox21",  # Replace with your Hub model name
     
)
#schedular



### Defining Computing Metrics

In [None]:
from evaluate import load
import numpy as np
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score,matthews_corrcoef

accuracy_metric = load("accuracy")
mcc_metric= load("matthews_correlation")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    
    probabilities = softmax(logits, axis=1)[:, 1]  # Get probabilities for class 1
    predictions = np.argmax(logits, axis=1)  # Choose the most likely class
    

    mcc = matthews_corrcoef(labels, predictions)

    return {
        "eval_mcc_metric": mcc,
        "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
        "AUC-ROC": roc_auc_score(labels, probabilities),  # AUC-ROC requires probabilities
        "Precision": precision_score(labels, predictions),
        "Recall": recall_score(labels, predictions),
        "F1-score": f1_score(labels, predictions)
    } 


### Defining Metrics

In [None]:
import torch

class_weights= [1-(train_dataset_clin['labels'].count(0)/len(train_dataset_clin['labels'])),
                           1-(train_dataset_clin['labels'].count(1)/len(train_dataset_clin['labels']))]

class_weights = torch.from_numpy(np.array(class_weights)).float().to("cuda")
print(class_weights)

tensor([0.0768, 0.9232], device='cuda:0')


In [None]:
class WeightedLossTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):

        outputs = model(**inputs)
        logits = outputs.get("logits")

        # Extract labels
        labels = inputs.get("labels")

        # compute custom loss (suppose one has 2 labels with different weights)
        loss_func = torch.nn.CrossEntropyLoss(weight=class_weights)

        # compute loss
        loss = loss_func(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss


In [None]:
trainer= WeightedLossTrainer(
    model=model_train,
    args=training_args,
    train_dataset=train_dataset_clin,
    eval_dataset=val_dataset_clin,
    tokenizer=tokenizer_clin,
    compute_metrics=compute_metrics
)
    


  trainer= WeightedLossTrainer(


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()
trainer.save_model('./model2_clin')



Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.642303,0.262208,0.689189,0.811607,0.134615,0.875,0.233333
2,No log,0.583292,0.34253,0.878378,0.805357,0.25,0.625,0.357143
3,No log,0.541144,0.368154,0.891892,0.833036,0.277778,0.625,0.384615
4,No log,0.439541,0.331143,0.905405,0.914286,0.285714,0.5,0.363636
5,No log,0.484964,0.414774,0.912162,0.816071,0.333333,0.625,0.434783
6,0.463300,0.456614,0.530879,0.945946,0.826786,0.5,0.625,0.555556
7,0.463300,0.456558,0.471429,0.945946,0.838393,0.5,0.5,0.5
8,0.463300,0.475258,0.39792,0.905405,0.839286,0.3125,0.625,0.416667
9,0.463300,0.495006,0.564337,0.952703,0.85,0.555556,0.625,0.588235
10,0.463300,0.300211,0.36687,0.918919,0.915179,0.333333,0.5,0.4




___
___

## Hyperparameter Tuning Using WanDB

In [None]:
import wandb
from transformers import Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from evaluate import load
from datasets import Dataset
import numpy as np
import pandas as pd
import os
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score,matthews_corrcoef


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mharodharsha21[0m ([33mharodharsha21-iit-ropar[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
def data_load():
    train_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_train.csv')
    val_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_test.csv')

    return train_clin, val_clin

In [None]:
def data_prep(data_process,tokenizer_clin):

    smiles_list_clin = data_process['smiles'].tolist()
    tokenized_clin=tokenizer_clin(smiles_list_clin)
    
    
    dataset_clin = Dataset.from_dict(tokenized_clin)
    

    labels_clin = data_process['CT_TOX'].tolist() # Assuming tasks start from column 1
    
    dataset_clin = dataset_clin.add_column("labels", labels_clin)
    

    return dataset_clin





In [None]:
from peft import LoraConfig, get_peft_model, PeftModel

def lora_config(r, lora_alpha, dropout):

    lora_config = LoraConfig(
        task_type="SEQ_CLS",  # Sequence classification task
        r=r,  # Rank of LoRA matrices
        lora_alpha=lora_alpha,  # Scaling factor double of rank( from the rule of thumb)
        target_modules='all-linear',
        lora_dropout=dropout  # Dropout rate
        #init_lora_weights="gaussian"
    )

    return lora_config


#model_train = get_peft_model(model_clin, lora_config)

In [None]:
import torch

class_weights= [1-(train_dataset_clin['labels'].count(0)/len(train_dataset_clin['labels'])),
                           1-(train_dataset_clin['labels'].count(1)/len(train_dataset_clin['labels']))]

class_weights = torch.from_numpy(np.array(class_weights)).float().to("cuda")

class WeightedLossTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):

        outputs = model(**inputs)
        logits = outputs.get("logits")

        # Extract labels
        labels = inputs.get("labels")

        # compute custom loss (suppose one has 2 labels with different weights)
        loss_func = torch.nn.CrossEntropyLoss(weight=class_weights)

        # compute loss
        loss = loss_func(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

In [None]:


# initialize wandb with sweep 
def run_training():

    run = wandb.init(project="Clintox Hyperparameter Tuning")
    config = run.config
    config.batch_size = 128
    config.num_epochs = 10

    tokenizer_clin = AutoTokenizer.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    trust_remote_code=True
    )

#Data

    train_data, val_data=data_load()
    training_data=data_prep(train_data,tokenizer_clin)
    validation_data=data_prep(val_data,tokenizer_clin)    

    

# Load the model with a classification head
    model_clin = AutoModelForSequenceClassification.from_pretrained(
        "ibm/MoLFormer-XL-both-10pct",
        num_labels=2,
        problem_type="single_label_classification",    
        trust_remote_code=True
    )

    peft_config = lora_config(config.r, config.lora_alpha, config.dropout)
    lora_model = get_peft_model(model_clin, peft_config)

    training_args = TrainingArguments(
    output_dir="./results_clin_rerunmodel",
    evaluation_strategy="epoch",
    learning_rate=config.lr,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs_clin",
    logging_strategy="steps",
    logging_steps=100,
    report_to="wandb",
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_mcc_metric"
    )


    accuracy_metric = load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probabilities = softmax(logits, axis=1)[:, 1]  # Get probabilities for class 1
        predictions = np.argmax(logits, axis=1)  # Choose the most likely class
        mcc = matthews_corrcoef(labels, predictions)

        return {
            "eval_mcc_metric": mcc,
            "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities),  # AUC-ROC requires probabilities
            "Precision": precision_score(labels, predictions),
            "Recall": recall_score(labels, predictions),
            "F1-score": f1_score(labels, predictions)
        }


   

    trainer= WeightedLossTrainer(
    model=lora_model,
    args=training_args,
    train_dataset=training_data,
    eval_dataset=validation_data,
    tokenizer=tokenizer_clin,
    compute_metrics=compute_metrics
    )
    

    
    trainer.train()

    wandb.finish()

    trainer.save_model("./best_clintox_model_rerunmodel")
    print("Best model saved to ./best_clintox_model")




### rerun model

In [None]:
 # Define the sweep configuration
def main():

    sweep_config = {
    "name": "Clintox Hyperparameter Tuning",
    "method": "bayes",
    "metric": {
        "goal": "maximize",
        "name": "eval/mcc_metric"
        },
    "parameters": {
        "lr": {
        "distribution": "uniform",
                "min": 1e-5,
                "max": 2e-5
        },
        "r": {
            "values": [4, 8, 16, 32,64]
        },
        "lora_alpha": {
            "values": [8, 16, 32, 64,128]
        },
        "dropout": {
            "values": [0.0, 0.1, 0.2]
        }
    }
    }
    sweep_id = wandb.sweep(sweep_config, project="huggingface")
    wandb.agent(sweep_id, function=run_training, count=10)

    api = wandb.Api()
    sweep = api.sweep(f"huggingface/{sweep_id}")
    print(sweep.runs[0].summary_metrics)

    runs_with_eval_loss = [run for run in sweep.runs if 'eval/mcc_metric' in run.summary_metrics]

    if runs_with_eval_loss:
        best_run = sorted(runs_with_eval_loss, key=lambda run: run.summary_metrics['eval/mcc_metric'],reverse=False)[0]
    else:
        raise ValueError("No runs found with 'eval/mcc_metric' metric.")

    best_hyperparameters = best_run.config
    print(best_hyperparameters)

if __name__ == "__main__":
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    main()

In [None]:
 # Define the sweep configuration
def main():

    sweep_config = {
    "name": "Clintox Hyperparameter Tuning",
    "method": "bayes",
    "metric": {
        "goal": "maximize",
        "name": "eval_mcc_metric"
        },
    "parameters": {
        "lr": {
        "distribution": "uniform",
                "min": 1e-5,
                "max": 2e-5
        },
        "r": {
            "values": [4, 8, 16, 32,64]
        },
        "lora_alpha": {
            "values": [8, 16, 32, 64,128]
        },
        "dropout": {
            "values": [0.0, 0.1, 0.2]
        }
    }
    }
    sweep_id = wandb.sweep(sweep_config, project="huggingface")
    wandb.agent(sweep_id, function=run_training, count=10)

    api = wandb.Api()
    sweep = api.sweep(f"huggingface/{sweep_id}")
    print(sweep.runs[0].summary_metrics)

    runs_with_eval_loss = [run for run in sweep.runs if 'eval_mcc_metric' in run.summary_metrics]

    if runs_with_eval_loss:
        best_run = sorted(runs_with_eval_loss, key=lambda run: run.summary_metrics['eval_mcc_metric'],reverse=False)[0]
    else:
        raise ValueError("No runs found with 'eval_mcc_metric' metric.")

    best_hyperparameters = best_run.config
    print(best_hyperparameters)

if __name__ == "__main__":
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    main()

Create sweep with ID: morh2os6
Sweep URL: https://wandb.ai/harodharsha21-iit-ropar/huggingface/sweeps/morh2os6


[34m[1mwandb[0m: Agent Starting Run: vtced8ur with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 64
[34m[1mwandb[0m: 	lr: 1.968011583193597e-05
[34m[1mwandb[0m: 	r: 64


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.25184,0.915385,0.986014,0.997041,0.923077,0.923077,0.923077
2,0.419800,0.137995,0.912871,0.986014,0.999408,1.0,0.846154,0.916667
3,0.168100,0.187971,0.912871,0.986014,0.998225,1.0,0.846154,0.916667
4,0.101500,0.205897,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
5,0.101500,0.271622,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
6,0.087500,0.29405,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
7,0.074700,0.290475,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
8,0.130100,0.364801,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
9,0.130100,0.352266,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
10,0.093200,0.28903,0.912871,0.986014,0.995266,1.0,0.846154,0.916667


0,1
eval/AUC-ROC,▆█▇▄▄▄▄▁▄▄
eval/Accuracy,▁▁▁▁▁▁▁▁▁▁
eval/F1-score,█▁▁▁▁▁▁▁▁▁
eval/Precision,▁█████████
eval/Recall,█▁▁▁▁▁▁▁▁▁
eval/loss,▅▁▃▃▅▆▆██▆
eval/mcc_metric,█▁▁▁▁▁▁▁▁▁
eval/runtime,▂▄▁▁▃▄▄█▂█
eval/samples_per_second,▆▄██▆▅▄▁▇▁
eval/steps_per_second,▆▄██▆▅▄▁▇▁

0,1
eval/AUC-ROC,0.99527
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.28903
eval/mcc_metric,0.91287
eval/runtime,0.5948
eval/samples_per_second,240.433
eval/steps_per_second,15.132


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: e4mkl3pg with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 64
[34m[1mwandb[0m: 	lr: 1.7590566832365665e-05
[34m[1mwandb[0m: 	r: 4


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.292653,0.912871,0.986014,0.987574,1.0,0.846154,0.916667
2,0.399700,0.199045,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
3,0.182100,0.266918,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
4,0.136400,0.264425,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
5,0.136400,0.358661,0.912871,0.986014,0.992308,1.0,0.846154,0.916667
6,0.126500,0.358785,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
7,0.127200,0.406138,0.912871,0.986014,0.99645,1.0,0.846154,0.916667
8,0.185400,0.473956,0.912871,0.986014,0.987574,1.0,0.846154,0.916667
9,0.185400,0.418338,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
10,0.141000,0.427414,0.86711,0.979021,0.994675,1.0,0.769231,0.869565


0,1
eval/AUC-ROC,▁▆▆▄▅▆█▁▇▇
eval/Accuracy,█████████▁
eval/F1-score,█████████▁
eval/Precision,▁▁▁▁▁▁▁▁▁▁
eval/Recall,█████████▁
eval/loss,▃▁▃▃▅▅▆█▇▇
eval/mcc_metric,█████████▁
eval/runtime,▅▂▁▃▁█▂▃▄▂
eval/samples_per_second,▃▇█▆█▁▆▆▅▇
eval/steps_per_second,▃▇█▆█▁▆▆▅▇

0,1
eval/AUC-ROC,0.99467
eval/Accuracy,0.97902
eval/F1-score,0.86957
eval/Precision,1.0
eval/Recall,0.76923
eval/loss,0.42741
eval/mcc_metric,0.86711
eval/runtime,0.4139
eval/samples_per_second,345.505
eval/steps_per_second,21.745


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: xa7l15n9 with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	lora_alpha: 32
[34m[1mwandb[0m: 	lr: 1.4255936086369527e-05
[34m[1mwandb[0m: 	r: 4


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.400473,0.781627,0.965035,0.966272,0.833333,0.769231,0.8
2,0.472100,0.252105,0.912871,0.986014,0.984615,1.0,0.846154,0.916667
3,0.232800,0.181187,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
4,0.148000,0.183903,0.912871,0.986014,0.992308,1.0,0.846154,0.916667
5,0.148000,0.175324,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
6,0.109000,0.247986,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
7,0.130800,0.303584,0.912871,0.986014,0.989349,1.0,0.846154,0.916667
8,0.159700,0.250188,0.912871,0.986014,0.991716,1.0,0.846154,0.916667
9,0.159700,0.269858,0.912871,0.986014,0.991716,1.0,0.846154,0.916667
10,0.134000,0.274461,0.912871,0.986014,0.995266,1.0,0.846154,0.916667


0,1
eval/AUC-ROC,▁▅█▇██▇▇▇█
eval/Accuracy,▁█████████
eval/F1-score,▁█████████
eval/Precision,▁█████████
eval/Recall,▁█████████
eval/loss,█▃▁▁▁▃▅▃▄▄
eval/mcc_metric,▁█████████
eval/runtime,▁▂▁▂▁▁▂█▂▆
eval/samples_per_second,█▇█▆██▇▁▇▃
eval/steps_per_second,█▇█▆██▇▁▇▃

0,1
eval/AUC-ROC,0.99527
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.27446
eval/mcc_metric,0.91287
eval/runtime,0.534
eval/samples_per_second,267.797
eval/steps_per_second,16.854


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: ffuiqzlt with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 16
[34m[1mwandb[0m: 	lr: 1.0192515990124136e-05
[34m[1mwandb[0m: 	r: 32


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.492122,0.576923,0.93007,0.945562,0.615385,0.615385,0.615385
2,0.535900,0.350833,0.912871,0.986014,0.986391,1.0,0.846154,0.916667
3,0.332200,0.261779,0.912871,0.986014,0.989941,1.0,0.846154,0.916667
4,0.215100,0.22075,0.86711,0.979021,0.989941,1.0,0.769231,0.869565
5,0.215100,0.188001,0.912871,0.986014,0.992899,1.0,0.846154,0.916667
6,0.152300,0.185389,0.912871,0.986014,0.998817,1.0,0.846154,0.916667
7,0.149600,0.173991,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
8,0.149800,0.205593,0.912871,0.986014,0.991716,1.0,0.846154,0.916667
9,0.149800,0.203931,0.912871,0.986014,0.992899,1.0,0.846154,0.916667
10,0.128500,0.174991,0.86711,0.979021,0.99645,1.0,0.769231,0.869565


0,1
eval/AUC-ROC,▁▆▇▇▇█▇▇▇█
eval/Accuracy,▁██▇█████▇
eval/F1-score,▁██▇█████▇
eval/Precision,▁█████████
eval/Recall,▁██▆█████▆
eval/loss,█▅▃▂▁▁▁▂▂▁
eval/mcc_metric,▁██▇█████▇
eval/runtime,▁▁▃█▃▄▁▂▂▆
eval/samples_per_second,██▆▁▅▅█▇▇▃
eval/steps_per_second,██▆▁▅▅█▇▇▃

0,1
eval/AUC-ROC,0.99645
eval/Accuracy,0.97902
eval/F1-score,0.86957
eval/Precision,1.0
eval/Recall,0.76923
eval/loss,0.17499
eval/mcc_metric,0.86711
eval/runtime,0.4745
eval/samples_per_second,301.372
eval/steps_per_second,18.967


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: dyyaz919 with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	lora_alpha: 8
[34m[1mwandb[0m: 	lr: 1.097742889794737e-05
[34m[1mwandb[0m: 	r: 64


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.492905,0.693894,0.951049,0.929586,0.75,0.692308,0.72
2,0.532600,0.366981,0.771728,0.965035,0.976331,0.9,0.692308,0.782609
3,0.340800,0.267567,0.869361,0.979021,0.994675,0.916667,0.846154,0.88
4,0.228000,0.222787,0.869361,0.979021,0.991716,0.916667,0.846154,0.88
5,0.228000,0.194067,0.912871,0.986014,0.995858,1.0,0.846154,0.916667
6,0.158200,0.19558,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
7,0.163600,0.215881,0.912871,0.986014,0.989349,1.0,0.846154,0.916667
8,0.166100,0.208225,0.912871,0.986014,0.991716,1.0,0.846154,0.916667
9,0.166100,0.198802,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
10,0.144300,0.201015,0.912871,0.986014,0.994675,1.0,0.846154,0.916667


0,1
eval/AUC-ROC,▁▆████▇███
eval/Accuracy,▁▄▇▇██████
eval/F1-score,▁▃▇▇██████
eval/Precision,▁▅▆▆██████
eval/Recall,▁▁████████
eval/loss,█▅▃▂▁▁▂▁▁▁
eval/mcc_metric,▁▃▇▇██████
eval/runtime,▁▄█▆▁▁▁▁▂▁
eval/samples_per_second,█▄▁▃█▇██▇▇
eval/steps_per_second,█▄▁▃█▇██▇▇

0,1
eval/AUC-ROC,0.99467
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.20101
eval/mcc_metric,0.91287
eval/runtime,0.4254
eval/samples_per_second,336.16
eval/steps_per_second,21.157


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: k62x41nm with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 8
[34m[1mwandb[0m: 	lr: 1.1405499373113077e-05
[34m[1mwandb[0m: 	r: 16


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.484266,0.576923,0.93007,0.951479,0.615385,0.615385,0.615385
2,0.531000,0.345565,0.869361,0.979021,0.988757,0.916667,0.846154,0.88
3,0.327900,0.2629,0.912871,0.986014,0.990533,1.0,0.846154,0.916667
4,0.219500,0.226085,0.86711,0.979021,0.988757,1.0,0.769231,0.869565
5,0.219500,0.192307,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
6,0.159800,0.18984,0.912871,0.986014,0.998817,1.0,0.846154,0.916667
7,0.159600,0.178359,0.912871,0.986014,0.995858,1.0,0.846154,0.916667
8,0.157100,0.208101,0.912871,0.986014,0.992308,1.0,0.846154,0.916667
9,0.157100,0.208423,0.912871,0.986014,0.992899,1.0,0.846154,0.916667
10,0.136000,0.181416,0.86711,0.979021,0.997041,1.0,0.769231,0.869565


0,1
eval/AUC-ROC,▁▇▇▇▇██▇▇█
eval/Accuracy,▁▇█▇█████▇
eval/F1-score,▁▇█▇█████▇
eval/Precision,▁▆████████
eval/Recall,▁██▆█████▆
eval/loss,█▅▃▂▁▁▁▂▂▁
eval/mcc_metric,▁▇█▇█████▇
eval/runtime,▁▃▂█▂▂▂▁▁▁
eval/samples_per_second,▇▅▇▁▇▇▆▇██
eval/steps_per_second,▇▅▇▁▇▇▆▇██

0,1
eval/AUC-ROC,0.99704
eval/Accuracy,0.97902
eval/F1-score,0.86957
eval/Precision,1.0
eval/Recall,0.76923
eval/loss,0.18142
eval/mcc_metric,0.86711
eval/runtime,0.4017
eval/samples_per_second,356.028
eval/steps_per_second,22.407


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: otlhp516 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 16
[34m[1mwandb[0m: 	lr: 1.84113390415324e-05
[34m[1mwandb[0m: 	r: 64


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.365252,0.693894,0.951049,0.985799,0.75,0.692308,0.72
2,0.451200,0.21067,0.912871,0.986014,0.992308,1.0,0.846154,0.916667
3,0.204900,0.177729,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
4,0.128100,0.170757,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
5,0.128100,0.185229,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
6,0.108900,0.177198,0.912871,0.986014,0.998817,1.0,0.846154,0.916667
7,0.112800,0.183044,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
8,0.133900,0.238558,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
9,0.133900,0.233902,0.912871,0.986014,0.992899,1.0,0.846154,0.916667
10,0.104700,0.18209,0.912871,0.986014,0.997041,1.0,0.846154,0.916667


0,1
eval/AUC-ROC,▁▄▅▄▆█▆▄▅▇
eval/Accuracy,▁█████████
eval/F1-score,▁█████████
eval/Precision,▁█████████
eval/Recall,▁█████████
eval/loss,█▂▁▁▂▁▁▃▃▁
eval/mcc_metric,▁█████████
eval/runtime,▃▁▂█▁▁▁▂▂▂
eval/samples_per_second,▆█▆▁███▇▇▇
eval/steps_per_second,▆█▆▁███▇▇▇

0,1
eval/AUC-ROC,0.99704
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.18209
eval/mcc_metric,0.91287
eval/runtime,0.4323
eval/samples_per_second,330.825
eval/steps_per_second,20.821


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: zbhs9ags with config:
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	lora_alpha: 128
[34m[1mwandb[0m: 	lr: 1.77773179415408e-05
[34m[1mwandb[0m: 	r: 64


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.196581,0.912871,0.986014,0.992308,1.0,0.846154,0.916667
2,0.332600,0.220922,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
3,0.161200,0.301411,0.912871,0.986014,0.991716,1.0,0.846154,0.916667
4,0.092400,0.404559,0.912871,0.986014,0.987574,1.0,0.846154,0.916667
5,0.092400,0.459982,0.912871,0.986014,0.985799,1.0,0.846154,0.916667
6,0.088600,0.388149,0.86711,0.979021,0.989941,1.0,0.769231,0.869565
7,0.074600,0.425321,0.912871,0.986014,0.987574,1.0,0.846154,0.916667
8,0.124200,0.506492,0.912871,0.986014,0.980473,1.0,0.846154,0.916667
9,0.124200,0.491953,0.86711,0.979021,0.989349,1.0,0.769231,0.869565
10,0.093700,0.413414,0.86711,0.979021,0.994083,1.0,0.769231,0.869565


0,1
eval/AUC-ROC,▇▆▇▅▄▆▅▁▆█
eval/Accuracy,█████▁██▁▁
eval/F1-score,█████▁██▁▁
eval/Precision,▁▁▁▁▁▁▁▁▁▁
eval/Recall,█████▁██▁▁
eval/loss,▁▂▃▆▇▅▆██▆
eval/mcc_metric,█████▁██▁▁
eval/runtime,▁█▁▁▁▁▁▁▁▁
eval/samples_per_second,█▁███▇████
eval/steps_per_second,█▁███▇████

0,1
eval/AUC-ROC,0.99408
eval/Accuracy,0.97902
eval/F1-score,0.86957
eval/Precision,1.0
eval/Recall,0.76923
eval/loss,0.41341
eval/mcc_metric,0.86711
eval/runtime,0.4176
eval/samples_per_second,342.451
eval/steps_per_second,21.553


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: cok8ufd6 with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	lora_alpha: 16
[34m[1mwandb[0m: 	lr: 1.688687471220676e-05
[34m[1mwandb[0m: 	r: 64


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.38961,0.781627,0.965035,0.969231,0.833333,0.769231,0.8
2,0.463400,0.248492,0.86711,0.979021,0.988757,1.0,0.769231,0.869565
3,0.228400,0.170818,0.912871,0.986014,0.995858,1.0,0.846154,0.916667
4,0.141100,0.157725,0.912871,0.986014,0.992308,1.0,0.846154,0.916667
5,0.141100,0.154103,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
6,0.104700,0.193525,0.912871,0.986014,0.994675,1.0,0.846154,0.916667
7,0.114800,0.24371,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
8,0.138600,0.214898,0.912871,0.986014,0.990533,1.0,0.846154,0.916667
9,0.138600,0.217667,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
10,0.116000,0.22722,0.912871,0.986014,0.994083,1.0,0.846154,0.916667


0,1
eval/AUC-ROC,▁▆█▇██▇▇▇█
eval/Accuracy,▁▆████████
eval/F1-score,▁▅████████
eval/Precision,▁█████████
eval/Recall,▁▁████████
eval/loss,█▄▁▁▁▂▄▃▃▃
eval/mcc_metric,▁▆████████
eval/runtime,▃▄▁▁█▁▂▃▃▄
eval/samples_per_second,▅▅▇█▁█▆▆▆▄
eval/steps_per_second,▅▅▇█▁█▆▆▆▄

0,1
eval/AUC-ROC,0.99408
eval/Accuracy,0.98601
eval/F1-score,0.91667
eval/Precision,1.0
eval/Recall,0.84615
eval/loss,0.22722
eval/mcc_metric,0.91287
eval/runtime,0.4583
eval/samples_per_second,311.989
eval/steps_per_second,19.636


Best model saved to ./best_clintox_model


[34m[1mwandb[0m: Agent Starting Run: j5clalsu with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	lora_alpha: 64
[34m[1mwandb[0m: 	lr: 1.6341311978149303e-05
[34m[1mwandb[0m: 	r: 4


Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Mcc Metric,Accuracy,Auc-roc,Precision,Recall,F1-score
1,No log,0.312351,0.830769,0.972028,0.987574,0.846154,0.846154,0.846154
2,0.414200,0.191448,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
3,0.183100,0.25399,0.912871,0.986014,0.993491,1.0,0.846154,0.916667
4,0.138200,0.259125,0.912871,0.986014,0.991124,1.0,0.846154,0.916667
5,0.138200,0.342877,0.912871,0.986014,0.991716,1.0,0.846154,0.916667
6,0.128400,0.343051,0.912871,0.986014,0.994083,1.0,0.846154,0.916667
7,0.129400,0.383682,0.912871,0.986014,0.995858,1.0,0.846154,0.916667
8,0.185300,0.454561,0.912871,0.986014,0.987574,1.0,0.846154,0.916667
9,0.185300,0.398464,0.912871,0.986014,0.995266,1.0,0.846154,0.916667
10,0.142500,0.409227,0.86711,0.979021,0.994675,1.0,0.769231,0.869565


0,1
eval/AUC-ROC,▁▆▆▄▄▇█▁▇▇
eval/Accuracy,▁████████▄
eval/F1-score,▁████████▃
eval/Precision,▁█████████
eval/Recall,█████████▁
eval/loss,▄▁▃▃▅▅▆█▇▇
eval/mcc_metric,▁████████▄
eval/runtime,▁▁▂▃▁▁█▂▃▆
eval/samples_per_second,██▇▆██▁▆▅▃
eval/steps_per_second,██▇▆██▁▆▅▃

0,1
eval/AUC-ROC,0.99467
eval/Accuracy,0.97902
eval/F1-score,0.86957
eval/Precision,1.0
eval/Recall,0.76923
eval/loss,0.40923
eval/mcc_metric,0.86711
eval/runtime,0.5802
eval/samples_per_second,246.457
eval/steps_per_second,15.511


Best model saved to ./best_clintox_model
{'_runtime': 122.387829053, '_step': 17, '_timestamp': 1741423869.0129378, '_wandb': {'runtime': 122}, 'eval/AUC-ROC': 0.9946745562130178, 'eval/Accuracy': 0.9790209790209792, 'eval/F1-score': 0.8695652173913043, 'eval/Precision': 1, 'eval/Recall': 0.7692307692307693, 'eval/loss': 0.4092266261577606, 'eval/mcc_metric': 0.86710996952412, 'eval/runtime': 0.5802, 'eval/samples_per_second': 246.457, 'eval/steps_per_second': 15.511, 'total_flos': 390418522567776, 'train/epoch': 10, 'train/global_step': 750, 'train/grad_norm': 0.0027433237992227077, 'train/learning_rate': 1.0894207985432869e-06, 'train/loss': 0.1425, 'train_loss': 0.18313566970825196, 'train_runtime': 117.4292, 'train_samples_per_second': 100.912, 'train_steps_per_second': 6.387}


ValueError: No runs found with 'eval_mcc_metric' metric.

## Evaluate on test Dataset

In [None]:
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
# Load the model with a classification head
from transformers import AutoModelForSequenceClassification, AutoTokenizer

base_model = AutoModelForSequenceClassification.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    num_labels=2,
    problem_type="single_label_classification",    
    trust_remote_code=True,
    deterministic_eval=True
)


  from .autonotebook import tqdm as notebook_tqdm
Some weights of MolformerForSequenceClassification were not initialized from the model checkpoint at ibm/MoLFormer-XL-both-10pct and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.dense2.bias', 'classifier.dense2.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
from peft import PeftModel

adapter_model = PeftModel.from_pretrained(base_model, "/home/raghvendra2/Molformer_Finetuning/best_clintox_w__model")



In [None]:
from peft import PeftModel

adapter_model = PeftModel.from_pretrained(base_model, "./best_clintox_model")



In [None]:
#peft_model = adapter_model.merge_and_unload()

In [None]:
import pandas as pd

test_data_clin=pd.read_csv('/home/raghvendra2/Molformer_Finetuning/clintox_test.csv')

In [None]:
tokenizer_clin = AutoTokenizer.from_pretrained(
    "ibm/MoLFormer-XL-both-10pct",
    trust_remote_code=True
)

In [None]:
smiles_test_clin = test_data_clin['smiles'].tolist()

test_tokenized_clin =tokenizer_clin(smiles_test_clin)

test_dataset_clin = Dataset.from_dict(test_tokenized_clin)

In [None]:
test_labels_clin = test_data_clin['CT_TOX'].tolist() # Assuming tasks start from column 1


test_dataset_clin = test_dataset_clin.add_column("labels", test_labels_clin)

In [None]:
from evaluate import load
import numpy as np
from scipy.special import softmax
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

accuracy_metric = load("accuracy")

def compute_metrics(eval_pred):
        logits, labels = eval_pred
        probabilities = softmax(logits, axis=1)[:, 1]  # Get probabilities for class 1
        predictions = np.argmax(logits, axis=1)  # Choose the most likely class
        mcc = matthews_corrcoef(labels, predictions)
        
        print("prob: ",probabilities)
        print("Predictions:", predictions)
        print("Labels:", labels)

        return {
            "eval_mcc_metric": mcc,
            "Accuracy": accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"],
            "AUC-ROC": roc_auc_score(labels, probabilities),  # AUC-ROC requires probabilities
            "Precision": precision_score(labels, predictions),
            "Recall": recall_score(labels, predictions),
            "F1-score": f1_score(labels, predictions)
        }
    

In [None]:
training_args = TrainingArguments(
    output_dir="./test_results_clintox2",
    per_device_eval_batch_size=16,
    report_to="none",  # Disable logging to W&B for test
    seed=42,  # Ensures reproducibility

)

tokenizer = AutoTokenizer.from_pretrained(
"ibm/MoLFormer-XL-both-10pct",
trust_remote_code=True
)

In [None]:
trainer= WeightedLossTrainer(
    model=adapter_model,
    args=training_args,
    eval_dataset=test_dataset_clin,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
    )

  trainer= WeightedLossTrainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
test_results_clin = trainer.evaluate()

print("Test Results for model2:", test_results_clin)



prob:  [0.1974876  0.18410206 0.04071056 0.06842607 0.05894638 0.19729373
 0.07677974 0.04483588 0.40389663 0.03832499 0.11893113 0.1893536
 0.06580044 0.06480221 0.03341505 0.8669206  0.17124312 0.10870148
 0.05051511 0.08069278 0.10911671 0.14440681 0.4009542  0.06409924
 0.05846755 0.1386373  0.03221663 0.98421544 0.09760652 0.1508786
 0.19772144 0.05802347 0.14473975 0.1097756  0.12167212 0.0254125
 0.3637201  0.16874947 0.1735283  0.09978637 0.16644715 0.28582984
 0.388635   0.1016976  0.08798417 0.11798432 0.16091965 0.14002986
 0.21268746 0.21100104 0.07887942 0.3085891  0.07098906 0.08430281
 0.12454053 0.16771178 0.08288874 0.06965958 0.06861016 0.03258727
 0.08207446 0.09404901 0.18429871 0.17011258 0.14170365 0.14707859
 0.11183274 0.1188296  0.0769853  0.02821218 0.06315152 0.0673309
 0.75138503 0.25719538 0.10843217 0.07390656 0.02522832 0.03075074
 0.07938734 0.01532564 0.2014002  0.20899868 0.0554406  0.12642872
 0.23929591 0.04706297 0.17177047 0.11767894 0.11745013 0.1

In [None]:
test_results_clin = trainer.evaluate()

print("Test Results for model wandb:", test_results_clin)

prob:  [0.29021806 0.35511708 0.26867062 0.48394194 0.32066518 0.41290104
 0.2983156  0.25890774 0.57834363 0.28756273 0.35133228 0.3969228
 0.1799421  0.41608563 0.19304872 0.80524325 0.34480733 0.28109017
 0.13885628 0.22536659 0.25828618 0.2606503  0.42974025 0.24434689
 0.18997893 0.24764091 0.2358779  0.96248376 0.22911075 0.26000497
 0.31553936 0.17834651 0.332849   0.36092708 0.3036842  0.17140168
 0.4110556  0.4459567  0.28366095 0.2572276  0.4148295  0.3456944
 0.48130488 0.228825   0.25668377 0.4100538  0.2858609  0.27362218
 0.3736393  0.38951382 0.25397232 0.41562295 0.21825364 0.32607055
 0.30642816 0.21882752 0.2423174  0.20875436 0.19991313 0.12312841
 0.21218301 0.25125518 0.2918531  0.28857717 0.37183434 0.23276204
 0.25722945 0.20545036 0.19823198 0.12558277 0.20493305 0.21120866
 0.69580644 0.38969347 0.31196836 0.21623373 0.14387546 0.18100762
 0.22032158 0.15885828 0.33385623 0.31971753 0.17065942 0.18428741
 0.42039642 0.16820471 0.3765629  0.29135376 0.33355084 0