In [None]:
# uncomment the below to install required packages and libraries
#!pip install datasets==2.11
#!pip install setfit==1.0.3
#!pip install loguru==0.6.0

In [25]:
from datasets import load_dataset
from datetime import date
from loguru import logger

from setfit import TrainingArguments, Trainer, SetFitModel
from huggingface_hub import notebook_login


In [None]:
# Set main variables
TRAINING_MODEL = "sentence-transformers/paraphrase-mpnet-base-v2"
TEST_SIZE = 0.15
Training_Arguments = TrainingArguments(batch_size=36, num_epochs=1, seed=123)

### 1. Load a sample dataset

In [None]:
def data_load():
    """
    Loads the dataset from Datasets Library
    """
    # load the dataset from datasets library
    dataset = load_dataset("SetFit/SentEval-CR")
    # select N examples per class (8 in this case)
    train_ds = dataset["train"].shuffle(seed=42).select(range(8 * 2))
    test_ds = dataset["test"]
    return train_ds, test_ds

### 2. Finetune a HF Setfit model

In [None]:
def model_finetuning(MODEL, TrainingArguments, Train_Data, Val_Data) :
    """
    Fine-tunes the specified model on the provided training data and evaluates it on the validation data.
    """
    # load the fine-tuned model
    model = SetFitModel.from_pretrained(MODEL)
    # set up the training arguments
    trainer = Trainer(
    model=model,
    args=TrainingArguments,
    train_dataset=Train_Data,
    eval_dataset= Val_Data
    )
    logger.info('fine-tuning the Setfit model on dataset')
    # launch the fine-tuning
    trainer.train()
    logger.info('saving the fine-tuned model')
    model_directory_timestamp = f'{date.today().strftime("%Y%m%d")}-reviews-text-classification'
    # save the model locally
    trainer.model.save_pretrained(model_directory_timestamp)
    # evaluate the model
    metrics = trainer.evaluate()
    logger.info(f"'Performance of fine-tuned model: , {metrics}")
    return trainer, metrics

In [None]:
def main():
    train_data, val_data = data_load()
    trainer, metrics = model_finetuning(
        TRAINING_MODEL, Training_Arguments, train_data, val_data)
    return trainer, metrics

In [None]:
if __name__== "__main__":
    trainer, metrics = main()

### 3. Push the fine-tuned model to hub

In [None]:
# excute this cell to paste the access token generated from HuggingFace account
notebook_login()

In [None]:
# Add the repo name followed by the model name
trainer.push_to_hub("sultanaw/sample-finetuned-model")  