In [None]:
!pip install setfit

In [None]:
from setfit import SetFitModel
from datasets import load_dataset
from setfit import Trainer
from setfit import TrainingArguments
from setfit import sample_dataset
from sklearn.metrics import f1_score
from setfit import SetFitModel
from typing import Dict, Any
from optuna import Trial
from typing import Dict, Union

In [None]:
def f1_score_func(preds, labels):
    score = f1_score(labels, preds, average = 'macro')
    return {"f1": score,"f1": score, }

In [None]:
dataset = load_dataset("csv", data_files="df_train.csv")
dataset = dataset.map(lambda example: {"label": example["sentiment"]})
dataset = dataset.remove_columns(["Unnamed: 0", "id", "sentiment"])

In [None]:
dataset_val = load_dataset("csv", data_files="df_val.csv")
dataset_val = dataset_val.map(lambda example: {"label": example["sentiment"]})
dataset_val = dataset_val.remove_columns(["Unnamed: 0", "id", "sentiment"])

In [None]:
model = SetFitModel.from_pretrained(
    "intfloat/multilingual-e5-large",
    use_differentiable_head=True,
    head_params={"out_features": 3},
)

In [None]:
args = TrainingArguments(
    batch_size=32,
    num_epochs=1,
    num_iterations = 2,
    seed = 12
)

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset["train"],
    eval_dataset=dataset_val["train"],
    metric=f1_score_func, 
)

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
import pandas as pd

df = pd.read_csv("holdback.tsv", sep = "\t") 
list_pred = model.predict(df["text"].to_list())
df["sentiment"] = list_pred.cpu().numpy()
df = pd.concat([df["sentiment"], df["id"]], axis = 1)
df.to_csv("predictions_schwager_1426515", sep="\t")