In [1]:
import pandas as pd
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sentence_transformers import SentenceTransformer, losses
import numpy as np
import cv2 as cv2
from tqdm.notebook import tqdm
from torch.utils.data import Dataset
from sentence_transformers.trainer import SentenceTransformerTrainer
from sentence_transformers.training_args import SentenceTransformerTrainingArguments
from sentence_transformers.evaluation import BinaryClassificationEvaluator
from huggingface_hub import HfFolder
import datasets

  from tqdm.autonotebook import tqdm, trange


In [2]:
comparable_data = pd.read_csv("comparable_data.csv")
comparable_data.head(3)

Unnamed: 0,title,price,cat_1,cat_2,cat_3,caracteristics,img_ref,target,dealer
0,Беговая дорожка UNIXFIT MX-990X,120890,Беговые дорожки,UNIXFIT,UNIXFIT MX-990X,Тип электрическая Уровень базовый Габариты (...,images/begovye_dorozhki/1_begdorozhki_1349.jpeg,begovye_dorozhki,begdorozhki
1,"Беговая дорожка Proxima Ivetta HRC, Арт. PROT-219",139990,Беговые дорожки,Proxima,"Proxima Ivetta HRC, Арт. PROT-219",Тип электрическая Уровень базовый Габариты (...,images/begovye_dorozhki/2_begdorozhki_1463.jpeg,begovye_dorozhki,begdorozhki
2,"Беговая дорожка UNIXFIT MX-990 AC (10,1"" TFT)",159890,Беговые дорожки,UNIXFIT,"UNIXFIT MX-990 AC (10,1"" TFT)",Тип электрическая Уровень полупрофессиональны...,images/begovye_dorozhki/3_begdorozhki_1638.jpeg,begovye_dorozhki,begdorozhki


In [3]:
def prepare_data(path_to_df, info_used = 'title_only'):
    df = pd.read_csv(path_to_df)
    updated_df = pd.DataFrame()
    for _, row in tqdm(df.iterrows(), total=df.shape[0]):
        if info_used == 'title_only':
            text = row["title"]
        elif info_used == 'title_cat':
            text = (
                str(row["title"])
                + " "
                + str(row["cat_1"])
                + " "
                + str(row["cat_2"])
                + " "
                + str(row["cat_3"])
            )
        else:
            text = (
                str(row["title"])
                + " "
                + str(row["cat_1"])
                + " "
                + str(row["cat_2"])
                + " "
                + str(row["cat_3"])
                + " "
                + str(row["caracteristics"])
            )

        label = row["target"]
        new_row = {"label_string": label, "text": text}
        updated_df = pd.concat([updated_df, pd.DataFrame([new_row])], ignore_index=True)
    data_classes = list(set(updated_df["label_string"].tolist()))
    updated_df["label"] = updated_df["label_string"].apply(data_classes.index)
    updated_df = updated_df.drop(["label_string"], axis=1)
    updated_df = updated_df[["label", "text"]]

    return updated_df

In [4]:
df = prepare_data("comparable_data.csv", info_used="title_cat")

  0%|          | 0/13718 [00:00<?, ?it/s]

In [5]:
df.head(5)

Unnamed: 0,label,text
0,0,Беговая дорожка UNIXFIT MX-990X Беговые дорожк...
1,0,"Беговая дорожка Proxima Ivetta HRC, Арт. PROT-..."
2,0,"Беговая дорожка UNIXFIT MX-990 AC (10,1"" TFT) ..."
3,0,Беговая дорожка Titanium Masters Physiotech TL...
4,0,Беговая дорожка Laufstein Commercial Беговые д...


In [6]:
def create_cos_sim_data(data_df, use_all_combos=False, combos_mult=1024):
    labels_positive = {}
    labels_negative = {}

    # for each label create a set of same label images.
    for i in list(data_df.label.unique()):
        labels_positive[i] = data_df[data_df.label == i]["text"].to_numpy()
    # for each label create a set of image of different label.
    for i in list(data_df.label.unique()):
        labels_negative[i] = data_df[data_df.label != i]["text"].to_numpy()
    cosine_loss_dataset = pd.DataFrame()
    for i, row in tqdm(data_df.iterrows(), total=len(data_df)):
        # label and image of the index for each row in df
        label = row["label"]

        if use_all_combos:
            # probability of same label image == 0.5
            if np.random.randint(0, 2) == 0:
                for i in range(int(len(labels_positive[label]) / combos_mult)):
                    second = labels_positive[label][i]
                    dis = 1.0
                    first = row["text"]
                    new_line = {'sentence1':first,
                                'sentence2': second,
                                'score': dis}
                    cosine_loss_dataset = pd.concat([cosine_loss_dataset, pd.DataFrame([new_line])])
            else:
                for i in range(int(len(labels_positive[label]) / combos_mult)):
                    second = labels_negative[label][
                        np.random.randint(0, len(labels_negative[label]))
                    ]
                    first = row["text"]
                    dis = 0.0
                    new_line = {"sentence1": first, "sentence2": second, "score": dis}
                    cosine_loss_dataset = pd.concat(
                        [cosine_loss_dataset, pd.DataFrame([new_line])]
                    )
        else:
            if np.random.randint(0, 2) == 0:
                second = labels_positive[label][
                    np.random.randint(0, len(labels_positive[label]))
                ]
                dis = 1.0
                first = row["text"]
                new_line = {'sentence1':first,
                            'sentence2': second,
                            'score': dis}
                cosine_loss_dataset = pd.concat([cosine_loss_dataset, pd.DataFrame([new_line])])
            else:
                second = labels_negative[label][
                    np.random.randint(0, len(labels_negative[label]))
                ]
                first = row["text"]
                dis = 0.0
                new_line = {'sentence1':first,
                            'sentence2': second,
                            'score': dis}
                cosine_loss_dataset = pd.concat([cosine_loss_dataset, pd.DataFrame([new_line])])
    return cosine_loss_dataset

In [7]:
cosine_loss_dataset = create_cos_sim_data(df)

  0%|          | 0/13718 [00:00<?, ?it/s]

In [8]:
train, test = train_test_split(cosine_loss_dataset, test_size=0.002, random_state=2012)
train_dataset = datasets.Dataset.from_pandas(train, preserve_index=False)

test_dataset = datasets.Dataset.from_pandas(test, preserve_index=False)

In [9]:
train_dataset

Dataset({
    features: ['sentence1', 'sentence2', 'score'],
    num_rows: 13690
})

In [10]:
model_name = "cointegrated/rubert-tiny2"
max_seq_length = 512
num_epochs = 10
train_batch_size = 32

- **Contrastive loss.** Expects as input two texts and a label of either 0 or 1. If the label == 1, then the distance between the two embeddings is reduced. If the label == 0, then the distance between the embeddings is increased.
- **CoSENT (Cosine Sentence) loss.** It expects that each of the InputExamples consists of a pair of texts and a float valued label, representing the expected similarity score between the pair.
- **CosineSimilarityLoss** expects that the InputExamples consists of two texts and a float label. It computes the vectors u = model(sentence_A) and v = model(sentence_B) and measures the cosine-similarity between the two. By default, it minimizes the following loss: ||input_label - cos_score_transformation(cosine_sim(u,v))||_2.

In [11]:
model = SentenceTransformer(model_name)
# loss = losses.CoSENTLoss(model)
loss = losses.ContrastiveLoss(model)
#loss = losses.CosineSimilarityLoss(model)



In [12]:
binary_acc_evaluator = BinaryClassificationEvaluator(
    sentences1=test_dataset["sentence1"],
    sentences2=test_dataset["sentence2"],
    labels=test_dataset["score"],
    name="cv",
)
results = binary_acc_evaluator(model)
results

  attn_output = torch.nn.functional.scaled_dot_product_attention(


{'cv_cosine_accuracy': 0.7142857142857143,
 'cv_cosine_accuracy_threshold': 0.6778384447097778,
 'cv_cosine_f1': 0.7999999999999999,
 'cv_cosine_f1_threshold': 0.6778384447097778,
 'cv_cosine_precision': 0.6956521739130435,
 'cv_cosine_recall': 0.9411764705882353,
 'cv_cosine_ap': 0.7655323961421687,
 'cv_dot_accuracy': 0.7142857142857143,
 'cv_dot_accuracy_threshold': 0.6778384447097778,
 'cv_dot_f1': 0.7999999999999999,
 'cv_dot_f1_threshold': 0.6778384447097778,
 'cv_dot_precision': 0.6956521739130435,
 'cv_dot_recall': 0.9411764705882353,
 'cv_dot_ap': 0.7655323961421687,
 'cv_manhattan_accuracy': 0.6785714285714286,
 'cv_manhattan_accuracy_threshold': 11.036173820495605,
 'cv_manhattan_f1': 0.7804878048780487,
 'cv_manhattan_f1_threshold': 11.298966407775879,
 'cv_manhattan_precision': 0.6666666666666666,
 'cv_manhattan_recall': 0.9411764705882353,
 'cv_manhattan_ap': 0.7596160535606904,
 'cv_euclidean_accuracy': 0.7142857142857143,
 'cv_euclidean_accuracy_threshold': 0.8026942014

In [13]:
output_dir = 'tiny_sent_transformer_v2'

In [14]:
with open("config.json", "r") as f:
    json_config = json.load(f)
TOKEN = json_config["token"]

In [15]:
HfFolder.save_token(TOKEN)

In [16]:
# 5. Define the training arguments
args = SentenceTransformerTrainingArguments(
    # Required parameter:
    output_dir=output_dir,
    # Optional training parameters:
    num_train_epochs=num_epochs,
    per_device_train_batch_size=train_batch_size,
    per_device_eval_batch_size=train_batch_size,
    warmup_ratio=0.1,
    fp16=True,  # Set to False if you get an error that your GPU can't run on FP16
    bf16=False,  # Set to True if you have a GPU that supports BF16
    # Optional tracking/debugging parameters:
    evaluation_strategy="epoch",
    save_strategy="no",
    hub_token=HfFolder.get_token(),
)



In [17]:
# 6. Create the trainer & start training
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    loss=loss,
    evaluator=binary_acc_evaluator,
)
trainer.train()

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkatya_shakhova[0m ([33mshakhova[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/4280 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.0056248256005346775, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.70265793800354, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.70265793800354, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.70265793800354, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.70265793800354, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 10.755304336547852, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 10.755304336547852, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.7711576223373413, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.7711576223373413, 'eval_cv_euclidean_precision

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.0015162104973569512, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.7512955665588379, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.7512955665588379, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.7512956857681274, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.7512956857681274, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 9.633075714111328, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 9.633075714111328, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.6830845475196838, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.6830845475196838, 'eval_cv_euclidean_pre

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.0011137289693579078, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.6815482378005981, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.6815482378005981, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.6815483570098877, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.6815483570098877, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 10.668746948242188, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 10.668746948242188, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.7667192220687866, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.7667192220687866, 'eval_cv_euclidean_p

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.0007361729512922466, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.663303554058075, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.663303554058075, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.6633034944534302, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.6633034944534302, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 10.777328491210938, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 10.777328491210938, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.7874004244804382, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.7874004244804382, 'eval_cv_euclidean_pre

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.0003793331270571798, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.7298336029052734, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.7298336029052734, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.7298336029052734, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.7298336029052734, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 9.359941482543945, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 9.359941482543945, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.6845762133598328, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.6845762133598328, 'eval_cv_euclidean_pre

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.00019296162645332515, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.7211805582046509, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.7211805582046509, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.7211805582046509, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.7211805582046509, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 9.254554748535156, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 9.254554748535156, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.672174334526062, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.672174334526062, 'eval_cv_euclidean_prec

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.00017789019329939038, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.707309901714325, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.707309901714325, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.707309901714325, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.707309901714325, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 9.434028625488281, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 9.434028625488281, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.6903454661369324, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.6903454661369324, 'eval_cv_euclidean_precis

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 0.00014209300570655614, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.7282200455665588, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.7282200455665588, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.7282200455665588, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.7282200455665588, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 9.142365455627441, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 9.142365455627441, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.6609802842140198, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.6609802842140198, 'eval_cv_euclidean_pr

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 8.968743350123987e-05, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.7283456325531006, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.7283456325531006, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.7283456325531006, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.7283456325531006, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 8.987017631530762, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 8.987017631530762, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.6474648714065552, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.6474648714065552, 'eval_cv_euclidean_pre

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 8.795221947366372e-05, 'eval_cv_cosine_accuracy': 1.0, 'eval_cv_cosine_accuracy_threshold': 0.7240798473358154, 'eval_cv_cosine_f1': 1.0, 'eval_cv_cosine_f1_threshold': 0.7240798473358154, 'eval_cv_cosine_precision': 1.0, 'eval_cv_cosine_recall': 1.0, 'eval_cv_cosine_ap': 1.0, 'eval_cv_dot_accuracy': 1.0, 'eval_cv_dot_accuracy_threshold': 0.7240797877311707, 'eval_cv_dot_f1': 1.0, 'eval_cv_dot_f1_threshold': 0.7240797877311707, 'eval_cv_dot_precision': 1.0, 'eval_cv_dot_recall': 1.0, 'eval_cv_dot_ap': 1.0, 'eval_cv_manhattan_accuracy': 1.0, 'eval_cv_manhattan_accuracy_threshold': 9.055404663085938, 'eval_cv_manhattan_f1': 1.0, 'eval_cv_manhattan_f1_threshold': 9.055404663085938, 'eval_cv_manhattan_precision': 1.0, 'eval_cv_manhattan_recall': 1.0, 'eval_cv_manhattan_ap': 1.0, 'eval_cv_euclidean_accuracy': 1.0, 'eval_cv_euclidean_accuracy_threshold': 0.6519391536712646, 'eval_cv_euclidean_f1': 1.0, 'eval_cv_euclidean_f1_threshold': 0.6519391536712646, 'eval_cv_euclidean_pre

TrainOutput(global_step=4280, training_loss=0.0017390777713784547, metrics={'train_runtime': 113.3171, 'train_samples_per_second': 1208.114, 'train_steps_per_second': 37.77, 'total_flos': 0.0, 'train_loss': 0.0017390777713784547, 'epoch': 10.0})

In [18]:
trainer.create_model_card()

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

In [19]:
trainer.push_to_hub()

model.safetensors:   0%|          | 0.00/117M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.50k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Shakhovak/tiny_sent_transformer_v2/commit/a9e10b476da76b9c5ffd678d4468f8d99b794c13', commit_message='End of training', commit_description='', oid='a9e10b476da76b9c5ffd678d4468f8d99b794c13', pr_url=None, pr_revision=None, pr_num=None)

In [20]:
model.save_pretrained(output_dir)