In [None]:
! pip install setfit


In [2]:
from datasets import load_dataset
from sentence_transformers.losses import CosineSimilarityLoss

from setfit import SetFitModel, SetFitTrainer


In [None]:
dataset = load_dataset("Moreza009/internal_LLM_num")


In [None]:
dataset

In [5]:
# Select N examples per class (8 in this case)
train_ds = dataset["train"].shuffle(seed=42).select(range(8 * 2))
test_ds = dataset["test"]


In [None]:
# Load SetFit model from Hub
model = SetFitModel.from_pretrained("sentence-transformers/paraphrase-mpnet-base-v2")

# Create trainer
metrics_list=[]
for i in ["accuracy", "recall", "precision", "f1"]:
  trainer = SetFitTrainer(
      model=model,
      train_dataset=train_ds,
      eval_dataset=test_ds,
      loss_class=CosineSimilarityLoss,
      batch_size=16,
      metric= i ,
      num_iterations=20, # Number of text pairs to generate for contrastive learning
      num_epochs=1, # Number of epochs to use for contrastive learning
      column_mapping={"patient medical hidtory": "text", "Inhospital Mortality": "label"}
  )
  trainer.train()
  metrics = trainer.evaluate()
  metrics_list.append(metrics)

In [7]:
metrics_list

[{'accuracy': 0.734640522875817},
 {'recall': 0.8574898785425101},
 {'precision': 0.8092354277062831},
 {'f1': 0.8572513287775246}]

In [8]:
import pandas as pd

In [9]:
df = pd.DataFrame({"few_shot":metrics_list})

In [10]:
df.to_excel("internsl_few_shots_Setfit.xlsx", index=False)