In [1]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding
from peft import LoraConfig, get_peft_model
from datasets import load_from_disk
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_model='intfloat/multilingual-e5-large'

tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)

In [3]:
dataset = load_from_disk('splitted_dataset/')

In [4]:
def preprocess(examples):
    tokenized = tokenizer(examples['text'], truncation=True, padding=True)
    return tokenized

tokenized_dataset = dataset.map(preprocess, batched=True,  remove_columns=["text"])

In [5]:
num_labels = dataset['train'].features['label'].num_classes
class_names = dataset["train"].features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")

number of labels: 2
the labels: ['0', '1']


In [6]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)

   # Calculate precision, recall, and F1-score
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    f1 = f1_score(labels, preds, average='weighted')
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [7]:
#pip install torch==2.5.1+cu124 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

In [8]:
# best_res = {'eval_f1':0.71}

# model = AutoModelForSequenceClassification.from_pretrained(base_model, id2label=id2label, device_map='cuda')
# for init_lora_weights in ['gaussian', 'pissa', 'olora']:
#     for r in tqdm([16, 32]):
#         for lora_alpha in [32, 64]:
#             for lora_dropout in [0.05, 0.075, 0.1, 0.125, 0.15]:
#                 for learning_rate in [5e-05, 7e-05, 1e-04, 2e-04, 3e-04]:
#                     peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, 
#                                             init_lora_weights=init_lora_weights, #use_rslora=True, 
#                                             )
#                     peft_model = get_peft_model(model, peft_config)
    
#                     training_args = TrainingArguments(
#                         output_dir="test_trainer", 
#                         fp16=True,
#                         num_train_epochs=30,
#                         load_best_model_at_end=True,
#                         metric_for_best_model='f1',
#                         greater_is_better=True,
#                         per_device_train_batch_size = 16,
#                         per_device_eval_batch_size = 16,
#                         learning_rate = learning_rate,
#                         logging_strategy='epoch',
#                         save_strategy='epoch',
#                         eval_strategy='epoch',
#                         seed=42,
#                     )
#                     trainer = Trainer(
#                         model=peft_model,
#                         args=training_args,
#                         data_collator=data_collator,
#                         train_dataset=tokenized_dataset['train'],
#                         eval_dataset=tokenized_dataset['test'],
#                         compute_metrics=compute_metrics,
#                         #tokenizer=tokenizer
#                     )
#                     trainer.train()
#                     eval_res = trainer.evaluate()
#                     if best_res['eval_f1']<eval_res['eval_f1']:
#                         best_res=eval_res
#                         print(f'saved in f1={eval_res["eval_f1"]}, r={r}, lora_alpha={lora_alpha}, lora_dropout={lora_dropout}, learning_rate={learning_rate}', best_res)
#                         trainer.model.save_pretrained(f'model f1={eval_res["eval_f1"]}, r={r}, lora_alpha={lora_alpha}, lora_dropout={lora_dropout}, learning_rate={learning_rate}')

In [9]:
def get_params():
    np.random.seed(None)
    init_lora_weights = str(np.random.choice(['gaussian', 'pissa', 'olora']))
    r = int(np.random.choice([8, 16, 32, 64]))
    lora_alpha =  int(np.random.choice([16, 32, 64])) #int(np.random.choice([i for i in [16, 32, 64, 128] if i>r]))
    lora_dropout = np.random.uniform(0, 0.15)
    learning_rate = np.random.uniform(5e-05, 3e-04)
    return init_lora_weights, r, lora_alpha, lora_dropout, learning_rate

In [10]:
def get_trainer(torch_seed):
    while True:
        torch.manual_seed(torch_seed)
        model = AutoModelForSequenceClassification.from_pretrained(base_model, id2label=id2label, device_map='cuda')
        init_lora_weights, r, lora_alpha, lora_dropout, learning_rate = get_params()
        #print(f'init_lora_weights={init_lora_weights}, r={r}, lora_alpha={lora_alpha}, lora_dropout={lora_dropout}, learning_rate={learning_rate}')
        peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, 
                            init_lora_weights=init_lora_weights, #use_rslora=True,
                            )
        peft_model = get_peft_model(model, peft_config)
    
        training_args = TrainingArguments(
            output_dir="test_trainer", 
            fp16=True,
            num_train_epochs=50,
            load_best_model_at_end=True,
            metric_for_best_model='f1',
            greater_is_better=True,
            per_device_train_batch_size = 16,
            per_device_eval_batch_size = 16,
            learning_rate = learning_rate,
            logging_strategy='epoch',
            save_strategy='epoch',
            eval_strategy='epoch',
            seed=42,
        )
        trainer = Trainer(
            model=peft_model,
            args=training_args,
            data_collator=data_collator,
            train_dataset=tokenized_dataset['train'],
            eval_dataset=tokenized_dataset['test'],
            compute_metrics=compute_metrics,
            #tokenizer=tokenizer
        )
        evals = trainer.evaluate()
        train_evals = trainer.evaluate(tokenized_dataset['train'])
        print(evals['eval_f1'], train_evals['eval_f1'])
        if evals['eval_f1']+train_evals['eval_f1']<1.2:
            torch_seed+=1
            continue
        print(torch_seed)
        return trainer, init_lora_weights, r, lora_alpha, lora_dropout, learning_rate, torch_seed

In [11]:
import warnings
import logging
warnings.filterwarnings('ignore')
logging.getLogger("transfomers.modeling_utils").setLevel(logging.ERROR)

In [None]:
best_res = {'eval_f1':0.71}

torch_seed = 2411
for i in tqdm(range(600)):
    trainer, init_lora_weights, r, lora_alpha, lora_dropout, learning_rate, torch_seed = get_trainer(torch_seed)
    print(f'init_lora_weights={init_lora_weights}, r={r}, lora_alpha={lora_alpha}, lora_dropout={lora_dropout}, learning_rate={learning_rate}')
    
    trainer.train()
    eval_res = trainer.evaluate()
    print(eval_res)
    if best_res['eval_f1']<eval_res['eval_f1']:
        best_res=eval_res
        print(f'saved in f1={eval_res["eval_f1"]}, init_lora_weights={init_lora_weights}, r={r}, lora_alpha={lora_alpha}, lora_dropout={lora_dropout}, learning_rate={learning_rate}', best_res)
        trainer.model.save_pretrained(f'model f1={eval_res["eval_f1"]}, init_lora_weights={init_lora_weights}, r={r}, lora_alpha={lora_alpha}, lora_dropout={lora_dropout}, learning_rate={learning_rate}')

In [None]:
# model = AutoModelForSequenceClassification.from_pretrained(base_model, id2label=id2label, device_map='cuda', trust_remote_code=True)

# peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=32, lora_alpha=64, lora_dropout=0.1, init_lora_weights='olora')
# peft_model = get_peft_model(model, peft_config)

# peft_model.print_trainable_parameters()

In [None]:
# training_args = TrainingArguments(
#     output_dir="test_trainer", 
#     fp16=True,
#     num_train_epochs=30,
#     load_best_model_at_end=True,
#     metric_for_best_model='f1',
#     greater_is_better=True,
#     per_device_train_batch_size = 16,
#     per_device_eval_batch_size = 16,
#     learning_rate = 1e-04,
#     logging_strategy='epoch',
#     save_strategy='epoch',
#     eval_strategy='epoch',
#     seed=42,
# )

In [None]:
# trainer = Trainer(
#     model=peft_model,
#     args=training_args,
#     data_collator=data_collator,
#     train_dataset=tokenized_dataset['train'],
#     eval_dataset=tokenized_dataset['test'],
#     compute_metrics=compute_metrics,
#     #tokenizer=tokenizer
# )

In [None]:
# trainer.train()

In [None]:
#r=32, lora_alpha=64, lora_dropout=0.1 batch 16
trainer.evaluate()