In [None]:
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from sentence_transformers import SentenceTransformer, InputExample, losses, util
import torch
from tqdm import tqdm
import math

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        texts = (row['question'], row['context'])
        return texts

# Load the dataset
csv_path = 'fine_tuning_dataset/all_merged_dataset.csv'
df = pd.read_csv(csv_path)

# Create a dataset and a DataLoader
train_dataset = CustomDataset(df)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16)

# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Use MultipleNegativesRankingLoss for training
train_loss = losses.MultipleNegativesRankingLoss(model)

# Fine-tune the model
num_epochs = 5
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

# To store train losses
train_losses = []

model.train()
for epoch in range(num_epochs):
    epoch_loss = 0
    for batch in tqdm(train_dataloader, desc=f"Training Epoch {epoch+1}"):
        optimizer.zero_grad()
        
        # Tokenize the batch of text pairs
        texts = [example for example in batch]
        batch_features = model.tokenize(texts)
        
        # Move input tensors to the GPU
        batch_features = {key: val.to(device) for key, val in batch_features.items()}
        
        # Forward pass
        model_output = model(batch_features)
        
        # Compute loss
        loss_value = train_loss(model_output['sentence_embedding'])
        
        # Backward pass and optimization
        loss_value.backward()
        optimizer.step()
        
        # Accumulate loss value
        epoch_loss += loss_value.item()
    
    # Compute average loss for the epoch
    avg_loss = epoch_loss / len(train_dataloader)
    train_losses.append(avg_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}")

print("Model fine-tuning complete. Model saved to `output_path_to_save_model`.")
print("Training losses over epochs: ", train_losses)
model.save('output_path_to_save_model')

In [None]:
# learning rate finder
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, InputExample, losses, util
import torch
import torch.optim as optim
import math
from sklearn.model_selection import train_test_split

# Load the dataset
csv_path = 'fine_tuning_dataset/all_merged_dataset.csv'
df = pd.read_csv(csv_path)

# Split the dataset into train and test sets
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

# Create InputExamples for training
train_examples = [InputExample(texts=[row['question'], row['context']]) for _, row in train_df.iterrows()]

# Create a DataLoader
batch_size = 16
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=batch_size)

# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Use MultipleNegativesRankingLoss for training
train_loss = losses.MultipleNegativesRankingLoss(model)

# Fine-tune the model
num_epochs = 10  # Example: Start with fewer epochs
initial_lr = 0.00002
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)
questions = test_df['question'].values
contexts = test_df['context'].values

optimizer = optim.AdamW(model.parameters(), lr=initial_lr)

# Assuming you want to use a custom training loop for better control
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    
    model.zero_grad()
    for step, batch in enumerate(train_dataloader):
        inputs = batch
        loss = train_loss(inputs)
        loss.backward()
        optimizer.step()

    # Here you would typically evaluate the performance on a validation set
    # model_test = SentenceTransformer(output_path, device='cpu')

    # with torch.no_grad():
    #     question_embeddings = model.encode(questions, convert_to_tensor=True)
    #     context_embeddings = model.encode(contexts, convert_to_tensor=True)

    #     # Perform element-wise multiplication for each question-context pair
    #     combined_embeddings = torch.mul(question_embeddings, context_embeddings)

    #     # Calculate cosine similarity matrix for the combined embeddings
    #     similarity_matrix = util.cos_sim(combined_embeddings, combined_embeddings).cpu().numpy()

    # # Extract the upper triangular part of the similarity matrix, excluding the diagonal
    # triu_indices = np.triu_indices_from(similarity_matrix, k=1)
    # triu_values = similarity_matrix[triu_indices]
    # average_similarity_score = np.mean(triu_values)

    # print("\nAverage Similarity Score (excluding diagonal):")
    # print(average_similarity_score)
    # print("\n" + "="*50 + "\n")

    # torch.cuda.empty_cache()
    # torch.cuda.ipc_collect()
    # del model_test
    torch.cuda.empty_cache()

print("Training Complete.")

In [1]:
# learning rate
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, InputExample, losses, util
import math
from sklearn.model_selection import train_test_split
import torch
import torch.optim as optim

# Load the dataset
csv_path = 'fine_tuning_dataset/all_merged_dataset.csv'
df = pd.read_csv(csv_path)

# Split the dataset into train and test sets
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

# Create InputExamples for training
train_examples = [InputExample(texts=[row['question'], row['context']]) for _, row in train_df.iterrows()]

# Create a DataLoader
# default batch_size=16
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=8)

# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Use MultipleNegativesRankingLoss for training
train_loss = losses.MultipleNegativesRankingLoss(model)

# Fine-tune the model
num_epochs = 300
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)
questions = test_df['question'].values
contexts = test_df['context'].values

assert len(questions) == len(contexts), "The number of questions must match the number of contexts"

# Define the optimizer
# optimizer = optim.AdamW(model.parameters(), lr=0.00002)  # Initial learning rate
# Scheduler parameters can be defined in the fit method

# normal also as default: 2e-5
learning_rates = [2e-5, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2]

lr_picked = learning_rates[0]
output_path = f'output_path_to_save_model-{lr_picked}'
for i in range(num_epochs):
    print(f"fine tune epoch {i+1}")
    model.fit(
        train_objectives=[(train_dataloader, train_loss)],
        epochs=1,
        warmup_steps=warmup_steps,
        output_path=output_path,
        # optimizer_class=optim.AdamW,
        # optimizer_params={'lr': lr_picked},
        scheduler='WarmupLinear'  # Use a supported scheduler
    )
    model_test = SentenceTransformer(output_path, device='cpu')

    with torch.no_grad():
        question_embeddings = model.encode(questions, convert_to_tensor=True)
        context_embeddings = model.encode(contexts, convert_to_tensor=True)

        # Perform element-wise multiplication for each question-context pair
        combined_embeddings = torch.mul(question_embeddings, context_embeddings)

        # Calculate cosine similarity matrix for the combined embeddings
        similarity_matrix = util.cos_sim(combined_embeddings, combined_embeddings).cpu().numpy()

    # Extract the upper triangular part of the similarity matrix, excluding the diagonal
    triu_indices = np.triu_indices_from(similarity_matrix, k=1)
    triu_values = similarity_matrix[triu_indices]
    average_similarity_score = np.mean(triu_values)

    print("\nAverage Similarity Score (excluding diagonal):")
    print(average_similarity_score)
    print("\n" + "="*50 + "\n")

    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    del model_test


print("Model fine-tuning complete. Model saved to `output_path_to_save_model`.")

  from tqdm.autonotebook import tqdm, trange


fine tune epoch 1


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:18<00:00,  4.31it/s]


{'train_runtime': 18.5636, 'train_samples_per_second': 34.099, 'train_steps_per_second': 4.31, 'train_loss': 0.580385971069336, 'epoch': 1.0}


                                                                     


Average Similarity Score (excluding diagonal):
0.29116905


fine tune epoch 2


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:19<00:00,  4.03it/s]


{'train_runtime': 19.8304, 'train_samples_per_second': 31.921, 'train_steps_per_second': 4.034, 'train_loss': 0.5490245342254638, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2879786


fine tune epoch 3


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:26<00:00,  3.04it/s]


{'train_runtime': 26.3241, 'train_samples_per_second': 24.046, 'train_steps_per_second': 3.039, 'train_loss': 0.5018077373504639, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2844635


fine tune epoch 4


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.93it/s]


{'train_runtime': 20.3635, 'train_samples_per_second': 31.085, 'train_steps_per_second': 3.929, 'train_loss': 0.4581899642944336, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.28067744


fine tune epoch 5


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.84it/s]


{'train_runtime': 20.8428, 'train_samples_per_second': 30.37, 'train_steps_per_second': 3.838, 'train_loss': 0.4180014610290527, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.27669156


fine tune epoch 6


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:21<00:00,  3.79it/s]


{'train_runtime': 21.0883, 'train_samples_per_second': 30.017, 'train_steps_per_second': 3.794, 'train_loss': 0.38104610443115233, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.27256846


fine tune epoch 7


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.91it/s]


{'train_runtime': 20.4797, 'train_samples_per_second': 30.909, 'train_steps_per_second': 3.906, 'train_loss': 0.347184157371521, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.26839164


fine tune epoch 8


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.95it/s]


{'train_runtime': 20.2726, 'train_samples_per_second': 31.224, 'train_steps_per_second': 3.946, 'train_loss': 0.31628386974334716, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.26422504


fine tune epoch 9


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.94it/s]


{'train_runtime': 20.3088, 'train_samples_per_second': 31.169, 'train_steps_per_second': 3.939, 'train_loss': 0.2881730079650879, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.26010466


fine tune epoch 10


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.91it/s]


{'train_runtime': 20.451, 'train_samples_per_second': 30.952, 'train_steps_per_second': 3.912, 'train_loss': 0.26264545917510984, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2560779


fine tune epoch 11


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.87it/s]


{'train_runtime': 20.6631, 'train_samples_per_second': 30.634, 'train_steps_per_second': 3.872, 'train_loss': 0.2394890546798706, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.25218356


fine tune epoch 12


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:24<00:00,  3.24it/s]


{'train_runtime': 24.6753, 'train_samples_per_second': 25.653, 'train_steps_per_second': 3.242, 'train_loss': 0.21849627494812013, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24842176


fine tune epoch 13


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:24<00:00,  3.23it/s]


{'train_runtime': 24.7484, 'train_samples_per_second': 25.577, 'train_steps_per_second': 3.233, 'train_loss': 0.19947140216827391, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24479978


fine tune epoch 14


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:24<00:00,  3.23it/s]


{'train_runtime': 24.7299, 'train_samples_per_second': 25.597, 'train_steps_per_second': 3.235, 'train_loss': 0.18225961923599243, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24134411


fine tune epoch 15


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:32<00:00,  2.45it/s]


{'train_runtime': 32.67, 'train_samples_per_second': 19.376, 'train_steps_per_second': 2.449, 'train_loss': 0.16668322086334228, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23806117


fine tune epoch 16


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:26<00:00,  3.01it/s]


{'train_runtime': 26.5588, 'train_samples_per_second': 23.834, 'train_steps_per_second': 3.012, 'train_loss': 0.15257368087768555, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23498982


fine tune epoch 17


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:30<00:00,  2.62it/s]


{'train_runtime': 30.4768, 'train_samples_per_second': 20.77, 'train_steps_per_second': 2.625, 'train_loss': 0.13975517749786376, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23216157


fine tune epoch 18


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.87it/s]


{'train_runtime': 20.6662, 'train_samples_per_second': 30.63, 'train_steps_per_second': 3.871, 'train_loss': 0.12808566093444823, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22956747


fine tune epoch 19


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.88it/s]


{'train_runtime': 20.6204, 'train_samples_per_second': 30.698, 'train_steps_per_second': 3.88, 'train_loss': 0.1174815058708191, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22720975


fine tune epoch 20


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.92it/s]


{'train_runtime': 20.4005, 'train_samples_per_second': 31.029, 'train_steps_per_second': 3.921, 'train_loss': 0.10782601833343505, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22508469


fine tune epoch 21


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:19<00:00,  4.00it/s]


{'train_runtime': 19.9861, 'train_samples_per_second': 31.672, 'train_steps_per_second': 4.003, 'train_loss': 0.09901057481765747, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22315945


fine tune epoch 22


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:19<00:00,  4.01it/s]


{'train_runtime': 19.9723, 'train_samples_per_second': 31.694, 'train_steps_per_second': 4.006, 'train_loss': 0.09096415042877197, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22141232


fine tune epoch 23


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.96it/s]


{'train_runtime': 20.2184, 'train_samples_per_second': 31.308, 'train_steps_per_second': 3.957, 'train_loss': 0.08361579775810242, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21981676


fine tune epoch 24


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.98it/s]


{'train_runtime': 20.0842, 'train_samples_per_second': 31.517, 'train_steps_per_second': 3.983, 'train_loss': 0.07690854668617249, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21836542


fine tune epoch 25


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.99it/s]


{'train_runtime': 20.0334, 'train_samples_per_second': 31.597, 'train_steps_per_second': 3.993, 'train_loss': 0.07077466249465943, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21707344


fine tune epoch 26


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:19<00:00,  4.01it/s]


{'train_runtime': 19.9689, 'train_samples_per_second': 31.699, 'train_steps_per_second': 4.006, 'train_loss': 0.06515035629272461, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21591355


fine tune epoch 27


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.99it/s]


{'train_runtime': 20.052, 'train_samples_per_second': 31.568, 'train_steps_per_second': 3.99, 'train_loss': 0.05999884009361267, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21487573


fine tune epoch 28


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.95it/s]


{'train_runtime': 20.2706, 'train_samples_per_second': 31.228, 'train_steps_per_second': 3.947, 'train_loss': 0.055286914110183716, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21393433


fine tune epoch 29


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:21<00:00,  3.81it/s]


{'train_runtime': 21.017, 'train_samples_per_second': 30.118, 'train_steps_per_second': 3.806, 'train_loss': 0.05097367763519287, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21308176


fine tune epoch 30


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:32<00:00,  2.48it/s]


{'train_runtime': 32.2332, 'train_samples_per_second': 19.638, 'train_steps_per_second': 2.482, 'train_loss': 0.047019317746162415, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21231048


fine tune epoch 31


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:27<00:00,  2.86it/s]


{'train_runtime': 27.942, 'train_samples_per_second': 22.654, 'train_steps_per_second': 2.863, 'train_loss': 0.04340418577194214, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21161358


fine tune epoch 32


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:30<00:00,  2.64it/s]


{'train_runtime': 30.3269, 'train_samples_per_second': 20.873, 'train_steps_per_second': 2.638, 'train_loss': 0.04010174572467804, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21097986


fine tune epoch 33


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.83it/s]


{'train_runtime': 20.8586, 'train_samples_per_second': 30.347, 'train_steps_per_second': 3.835, 'train_loss': 0.037088826298713684, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21040867


fine tune epoch 34


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.83it/s]


{'train_runtime': 20.8975, 'train_samples_per_second': 30.291, 'train_steps_per_second': 3.828, 'train_loss': 0.03434208631515503, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.209913


fine tune epoch 35


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.91it/s]


{'train_runtime': 20.4322, 'train_samples_per_second': 30.981, 'train_steps_per_second': 3.915, 'train_loss': 0.031835371255874635, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2094824


fine tune epoch 36


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.99it/s]


{'train_runtime': 20.0496, 'train_samples_per_second': 31.572, 'train_steps_per_second': 3.99, 'train_loss': 0.029541334509849547, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20910488


fine tune epoch 37


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.97it/s]


{'train_runtime': 20.1344, 'train_samples_per_second': 31.439, 'train_steps_per_second': 3.973, 'train_loss': 0.02742110788822174, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20876318


fine tune epoch 38


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.95it/s]


{'train_runtime': 20.2592, 'train_samples_per_second': 31.245, 'train_steps_per_second': 3.949, 'train_loss': 0.025452327728271485, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2084557


fine tune epoch 39


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:29<00:00,  2.72it/s]


{'train_runtime': 29.3789, 'train_samples_per_second': 21.546, 'train_steps_per_second': 2.723, 'train_loss': 0.02361961305141449, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20818603


fine tune epoch 40


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:30<00:00,  2.64it/s]


{'train_runtime': 30.3053, 'train_samples_per_second': 20.887, 'train_steps_per_second': 2.64, 'train_loss': 0.021910658478736876, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20794916


fine tune epoch 41


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:26<00:00,  2.98it/s]


{'train_runtime': 26.871, 'train_samples_per_second': 23.557, 'train_steps_per_second': 2.977, 'train_loss': 0.020315682888031004, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2077408


fine tune epoch 42


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:14<00:00,  5.35it/s]


{'train_runtime': 14.9593, 'train_samples_per_second': 42.315, 'train_steps_per_second': 5.348, 'train_loss': 0.018828046321868897, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20755853


fine tune epoch 43


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.33it/s]


{'train_runtime': 15.0212, 'train_samples_per_second': 42.14, 'train_steps_per_second': 5.326, 'train_loss': 0.017437314987182616, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20740041


fine tune epoch 44


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.31it/s]


{'train_runtime': 15.0673, 'train_samples_per_second': 42.011, 'train_steps_per_second': 5.309, 'train_loss': 0.016133125126361846, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2072677


fine tune epoch 45


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.26it/s]


{'train_runtime': 15.221, 'train_samples_per_second': 41.587, 'train_steps_per_second': 5.256, 'train_loss': 0.014916865527629853, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20715956


fine tune epoch 46


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.20it/s]


{'train_runtime': 15.3799, 'train_samples_per_second': 41.158, 'train_steps_per_second': 5.202, 'train_loss': 0.013782960176467896, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20708092


fine tune epoch 47


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.16it/s]


{'train_runtime': 15.4896, 'train_samples_per_second': 40.866, 'train_steps_per_second': 5.165, 'train_loss': 0.012726321816444397, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20703822


fine tune epoch 48


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.13it/s]


{'train_runtime': 15.589, 'train_samples_per_second': 40.605, 'train_steps_per_second': 5.132, 'train_loss': 0.011737558990716934, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20702857


fine tune epoch 49


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:17<00:00,  4.67it/s]


{'train_runtime': 17.1223, 'train_samples_per_second': 36.969, 'train_steps_per_second': 4.672, 'train_loss': 0.010814595222473144, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20704852


fine tune epoch 50


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.07it/s]


{'train_runtime': 15.764, 'train_samples_per_second': 40.155, 'train_steps_per_second': 5.075, 'train_loss': 0.009956040233373643, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.207094


fine tune epoch 51


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.08it/s]


{'train_runtime': 15.7576, 'train_samples_per_second': 40.171, 'train_steps_per_second': 5.077, 'train_loss': 0.009160185605287552, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20716098


fine tune epoch 52


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.08it/s]


{'train_runtime': 15.7408, 'train_samples_per_second': 40.214, 'train_steps_per_second': 5.082, 'train_loss': 0.008424831926822663, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20724587


fine tune epoch 53


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.05it/s]


{'train_runtime': 15.8479, 'train_samples_per_second': 39.942, 'train_steps_per_second': 5.048, 'train_loss': 0.007747098058462143, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20734563


fine tune epoch 54


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.99it/s]


{'train_runtime': 16.0328, 'train_samples_per_second': 39.482, 'train_steps_per_second': 4.99, 'train_loss': 0.007121572643518448, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20745741


fine tune epoch 55


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  5.00it/s]


{'train_runtime': 16.007, 'train_samples_per_second': 39.545, 'train_steps_per_second': 4.998, 'train_loss': 0.006544971466064453, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20757796


fine tune epoch 56


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.99it/s]


{'train_runtime': 16.0343, 'train_samples_per_second': 39.478, 'train_steps_per_second': 4.989, 'train_loss': 0.006012341752648353, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20770551


fine tune epoch 57


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.02it/s]


{'train_runtime': 15.9356, 'train_samples_per_second': 39.722, 'train_steps_per_second': 5.02, 'train_loss': 0.005522413551807404, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20783865


fine tune epoch 58


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.99it/s]


{'train_runtime': 16.0369, 'train_samples_per_second': 39.472, 'train_steps_per_second': 4.989, 'train_loss': 0.005073260888457299, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20797667


fine tune epoch 59


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:17<00:00,  4.54it/s]


{'train_runtime': 17.6302, 'train_samples_per_second': 35.904, 'train_steps_per_second': 4.538, 'train_loss': 0.004662305116653442, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20811984


fine tune epoch 60


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.91it/s]


{'train_runtime': 16.2974, 'train_samples_per_second': 38.841, 'train_steps_per_second': 4.909, 'train_loss': 0.004286695271730423, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20826729


fine tune epoch 61


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.95it/s]


{'train_runtime': 16.1658, 'train_samples_per_second': 39.157, 'train_steps_per_second': 4.949, 'train_loss': 0.003943866863846779, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20841834


fine tune epoch 62


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:27<00:00,  2.88it/s]


{'train_runtime': 27.7786, 'train_samples_per_second': 22.787, 'train_steps_per_second': 2.88, 'train_loss': 0.0036312837153673174, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20857228


fine tune epoch 63


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:24<00:00,  3.31it/s]


{'train_runtime': 24.1545, 'train_samples_per_second': 26.206, 'train_steps_per_second': 3.312, 'train_loss': 0.0033464621752500533, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20872833


fine tune epoch 64


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:29<00:00,  2.67it/s]


{'train_runtime': 29.9446, 'train_samples_per_second': 21.139, 'train_steps_per_second': 2.672, 'train_loss': 0.0030865291133522986, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20888576


fine tune epoch 65


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:24<00:00,  3.21it/s]


{'train_runtime': 24.9199, 'train_samples_per_second': 25.401, 'train_steps_per_second': 3.21, 'train_loss': 0.0028494128957390783, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2090442


fine tune epoch 66


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.82it/s]


{'train_runtime': 20.939, 'train_samples_per_second': 30.231, 'train_steps_per_second': 3.821, 'train_loss': 0.0026331815868616106, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20920321


fine tune epoch 67


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.82it/s]


{'train_runtime': 20.9179, 'train_samples_per_second': 30.261, 'train_steps_per_second': 3.824, 'train_loss': 0.0024360278621315956, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2093624


fine tune epoch 68


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.88it/s]


{'train_runtime': 20.6239, 'train_samples_per_second': 30.693, 'train_steps_per_second': 3.879, 'train_loss': 0.0022562704980373383, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20952168


fine tune epoch 69


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.92it/s]


{'train_runtime': 20.3921, 'train_samples_per_second': 31.041, 'train_steps_per_second': 3.923, 'train_loss': 0.0020921966060996056, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20968261


fine tune epoch 70


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.90it/s]


{'train_runtime': 20.5028, 'train_samples_per_second': 30.874, 'train_steps_per_second': 3.902, 'train_loss': 0.0019413016736507416, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.20984477


fine tune epoch 71


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.89it/s]


{'train_runtime': 20.5445, 'train_samples_per_second': 30.811, 'train_steps_per_second': 3.894, 'train_loss': 0.0018026933073997497, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21000859


fine tune epoch 72


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.94it/s]


{'train_runtime': 20.2787, 'train_samples_per_second': 31.215, 'train_steps_per_second': 3.945, 'train_loss': 0.0016750365495681763, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21017364


fine tune epoch 73


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.96it/s]


{'train_runtime': 20.1997, 'train_samples_per_second': 31.337, 'train_steps_per_second': 3.96, 'train_loss': 0.001557669136673212, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21033958


fine tune epoch 74


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.87it/s]


{'train_runtime': 20.6532, 'train_samples_per_second': 30.649, 'train_steps_per_second': 3.873, 'train_loss': 0.001450132019817829, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2105065


fine tune epoch 75


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:21<00:00,  3.80it/s]


{'train_runtime': 21.0458, 'train_samples_per_second': 30.077, 'train_steps_per_second': 3.801, 'train_loss': 0.0013522718101739884, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21067593


fine tune epoch 76


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.87it/s]


{'train_runtime': 20.6706, 'train_samples_per_second': 30.623, 'train_steps_per_second': 3.87, 'train_loss': 0.0012625301256775855, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2108473


fine tune epoch 77


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.97it/s]


{'train_runtime': 20.1329, 'train_samples_per_second': 31.441, 'train_steps_per_second': 3.974, 'train_loss': 0.0011801950633525849, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21102001


fine tune epoch 78


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.95it/s]


{'train_runtime': 20.2464, 'train_samples_per_second': 31.265, 'train_steps_per_second': 3.951, 'train_loss': 0.0011046251282095909, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21119353


fine tune epoch 79


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.95it/s]


{'train_runtime': 20.2643, 'train_samples_per_second': 31.237, 'train_steps_per_second': 3.948, 'train_loss': 0.0010352456010878086, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21136743


fine tune epoch 80


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.83it/s]


{'train_runtime': 20.8666, 'train_samples_per_second': 30.336, 'train_steps_per_second': 3.834, 'train_loss': 0.0009715406224131585, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21154118


fine tune epoch 81


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.07it/s]


{'train_runtime': 15.7866, 'train_samples_per_second': 40.097, 'train_steps_per_second': 5.068, 'train_loss': 0.0009130340069532395, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21171536


fine tune epoch 82


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.29it/s]


{'train_runtime': 15.1269, 'train_samples_per_second': 41.846, 'train_steps_per_second': 5.289, 'train_loss': 0.0008592107333242893, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21189432


fine tune epoch 83


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.78it/s]


{'train_runtime': 16.749, 'train_samples_per_second': 37.793, 'train_steps_per_second': 4.776, 'train_loss': 0.0008093411102890969, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2120775


fine tune epoch 84


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.78it/s]


{'train_runtime': 16.7304, 'train_samples_per_second': 37.835, 'train_steps_per_second': 4.782, 'train_loss': 0.0007631441578269005, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21226853


fine tune epoch 85


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:17<00:00,  4.69it/s]


{'train_runtime': 17.0661, 'train_samples_per_second': 37.091, 'train_steps_per_second': 4.688, 'train_loss': 0.0007200403138995171, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21246925


fine tune epoch 86


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:18<00:00,  4.33it/s]


{'train_runtime': 18.4604, 'train_samples_per_second': 34.29, 'train_steps_per_second': 4.334, 'train_loss': 0.0006796568632125854, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21267945


fine tune epoch 87


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.15it/s]


{'train_runtime': 15.5413, 'train_samples_per_second': 40.73, 'train_steps_per_second': 5.148, 'train_loss': 0.0006417797878384591, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21289876


fine tune epoch 88


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.06it/s]


{'train_runtime': 15.7985, 'train_samples_per_second': 40.067, 'train_steps_per_second': 5.064, 'train_loss': 0.0006062077358365059, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21312691


fine tune epoch 89


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.98it/s]


{'train_runtime': 20.1188, 'train_samples_per_second': 31.463, 'train_steps_per_second': 3.976, 'train_loss': 0.0005727682262659072, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21336356


fine tune epoch 90


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:15<00:00,  5.09it/s]


{'train_runtime': 15.7317, 'train_samples_per_second': 40.237, 'train_steps_per_second': 5.085, 'train_loss': 0.0005413095466792583, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21360826


fine tune epoch 91


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:17<00:00,  4.64it/s]


{'train_runtime': 17.2418, 'train_samples_per_second': 36.713, 'train_steps_per_second': 4.64, 'train_loss': 0.0005116930697113276, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21386075


fine tune epoch 92


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.98it/s]


{'train_runtime': 20.0941, 'train_samples_per_second': 31.502, 'train_steps_per_second': 3.981, 'train_loss': 0.00048379292711615564, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21412057


fine tune epoch 93


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:18<00:00,  4.30it/s]


{'train_runtime': 18.5942, 'train_samples_per_second': 34.043, 'train_steps_per_second': 4.302, 'train_loss': 0.0004574938677251339, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21438748


fine tune epoch 94


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:19<00:00,  4.01it/s]


{'train_runtime': 19.9538, 'train_samples_per_second': 31.723, 'train_steps_per_second': 4.009, 'train_loss': 0.0004326936788856983, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21466096


fine tune epoch 95


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:27<00:00,  2.87it/s]


{'train_runtime': 27.8307, 'train_samples_per_second': 22.745, 'train_steps_per_second': 2.875, 'train_loss': 0.0004093003459274769, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21494085


fine tune epoch 96


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:20<00:00,  3.84it/s]


{'train_runtime': 20.8489, 'train_samples_per_second': 30.361, 'train_steps_per_second': 3.837, 'train_loss': 0.0003872260684147477, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21522659


fine tune epoch 97


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:35<00:00,  2.24it/s]


{'train_runtime': 35.7563, 'train_samples_per_second': 17.703, 'train_steps_per_second': 2.237, 'train_loss': 0.0003663917537778616, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2155179


fine tune epoch 98


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:38<00:00,  2.06it/s]


{'train_runtime': 38.8524, 'train_samples_per_second': 16.292, 'train_steps_per_second': 2.059, 'train_loss': 0.00034672408364713194, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21581456


fine tune epoch 99


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:27<00:00,  2.94it/s]


{'train_runtime': 27.1483, 'train_samples_per_second': 23.316, 'train_steps_per_second': 2.947, 'train_loss': 0.0003281525336205959, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21611613


fine tune epoch 100


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:25<00:00,  3.16it/s]


{'train_runtime': 25.3382, 'train_samples_per_second': 24.982, 'train_steps_per_second': 3.157, 'train_loss': 0.0003106105141341686, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21642232


fine tune epoch 101


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.79it/s]


{'train_runtime': 16.6983, 'train_samples_per_second': 37.908, 'train_steps_per_second': 4.791, 'train_loss': 0.000294051505625248, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21673287


fine tune epoch 102


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:16<00:00,  4.80it/s]


{'train_runtime': 16.6771, 'train_samples_per_second': 37.956, 'train_steps_per_second': 4.797, 'train_loss': 0.0002784006996080279, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21704735


fine tune epoch 103


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [00:44<00:00,  1.80it/s]


{'train_runtime': 44.4627, 'train_samples_per_second': 14.237, 'train_steps_per_second': 1.799, 'train_loss': 0.0002636243123561144, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21736571


fine tune epoch 104


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.8558, 'train_samples_per_second': 7.124, 'train_steps_per_second': 0.9, 'train_loss': 0.00024965496268123386, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21768743


fine tune epoch 105


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.8688, 'train_samples_per_second': 7.123, 'train_steps_per_second': 0.9, 'train_loss': 0.00023646086920052766, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21801238


fine tune epoch 106


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.856, 'train_samples_per_second': 7.124, 'train_steps_per_second': 0.9, 'train_loss': 0.00022398901637643576, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21834026


fine tune epoch 107


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.3579, 'train_samples_per_second': 7.084, 'train_steps_per_second': 0.895, 'train_loss': 0.00021220676135271787, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21867071


fine tune epoch 108


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.7483, 'train_samples_per_second': 7.053, 'train_steps_per_second': 0.891, 'train_loss': 0.00020106825977563857, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21900365


fine tune epoch 109


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:30<00:00,  1.13s/it]


{'train_runtime': 90.2234, 'train_samples_per_second': 7.016, 'train_steps_per_second': 0.887, 'train_loss': 0.00019054730655625461, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21933883


fine tune epoch 110


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:35<00:00,  1.19s/it]


{'train_runtime': 95.3644, 'train_samples_per_second': 6.638, 'train_steps_per_second': 0.839, 'train_loss': 0.00018059661379083992, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.21967599


fine tune epoch 111


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:32<00:00,  1.16s/it]


{'train_runtime': 92.8821, 'train_samples_per_second': 6.815, 'train_steps_per_second': 0.861, 'train_loss': 0.00017119304975494743, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2200148


fine tune epoch 112


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.965, 'train_samples_per_second': 7.115, 'train_steps_per_second': 0.899, 'train_loss': 0.0001622983254492283, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22035518


fine tune epoch 113


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.943, 'train_samples_per_second': 7.117, 'train_steps_per_second': 0.899, 'train_loss': 0.00015389416366815568, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2206968


fine tune epoch 114


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.0212, 'train_samples_per_second': 7.111, 'train_steps_per_second': 0.899, 'train_loss': 0.00014594915555790066, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22103964


fine tune epoch 115


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.0592, 'train_samples_per_second': 7.108, 'train_steps_per_second': 0.898, 'train_loss': 0.0001384377363137901, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22138342


fine tune epoch 116


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.8552, 'train_samples_per_second': 7.124, 'train_steps_per_second': 0.9, 'train_loss': 0.0001313336892053485, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22172818


fine tune epoch 117


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.9404, 'train_samples_per_second': 7.117, 'train_steps_per_second': 0.899, 'train_loss': 0.00012461363803595304, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22207338


fine tune epoch 118


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.945, 'train_samples_per_second': 7.117, 'train_steps_per_second': 0.899, 'train_loss': 0.00011825660476461053, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22241904


fine tune epoch 119


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.8712, 'train_samples_per_second': 7.123, 'train_steps_per_second': 0.9, 'train_loss': 0.0001122433808632195, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22276491


fine tune epoch 120


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:31<00:00,  1.14s/it]


{'train_runtime': 91.5811, 'train_samples_per_second': 6.912, 'train_steps_per_second': 0.874, 'train_loss': 0.00010655603837221861, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22311093


fine tune epoch 121


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:31<00:00,  1.15s/it]


{'train_runtime': 91.6866, 'train_samples_per_second': 6.904, 'train_steps_per_second': 0.873, 'train_loss': 0.00010117200436070562, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22345686


fine tune epoch 122


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:32<00:00,  1.15s/it]


{'train_runtime': 92.3639, 'train_samples_per_second': 6.853, 'train_steps_per_second': 0.866, 'train_loss': 9.608226828277111e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22380261


fine tune epoch 123


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:32<00:00,  1.16s/it]


{'train_runtime': 92.9883, 'train_samples_per_second': 6.807, 'train_steps_per_second': 0.86, 'train_loss': 9.126350050792098e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22414806


fine tune epoch 124


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.1622, 'train_samples_per_second': 6.722, 'train_steps_per_second': 0.85, 'train_loss': 8.670140523463488e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2244932


fine tune epoch 125


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:39<00:00,  1.24s/it]


{'train_runtime': 99.3956, 'train_samples_per_second': 6.368, 'train_steps_per_second': 0.805, 'train_loss': 8.238352602347731e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22483772


fine tune epoch 126


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:30<00:00,  1.13s/it]


{'train_runtime': 90.0328, 'train_samples_per_second': 7.031, 'train_steps_per_second': 0.889, 'train_loss': 7.829666137695312e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2251817


fine tune epoch 127


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.0473, 'train_samples_per_second': 7.109, 'train_steps_per_second': 0.898, 'train_loss': 7.442856440320611e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22552486


fine tune epoch 128


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.3131, 'train_samples_per_second': 7.087, 'train_steps_per_second': 0.896, 'train_loss': 7.076431647874415e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22586712


fine tune epoch 129


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.336, 'train_samples_per_second': 7.086, 'train_steps_per_second': 0.895, 'train_loss': 6.729011074639857e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22620812


fine tune epoch 130


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.3162, 'train_samples_per_second': 7.087, 'train_steps_per_second': 0.896, 'train_loss': 6.399707053788007e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22654799


fine tune epoch 131


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.3945, 'train_samples_per_second': 7.081, 'train_steps_per_second': 0.895, 'train_loss': 6.088520749472082e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2268871


fine tune epoch 132


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.42, 'train_samples_per_second': 7.079, 'train_steps_per_second': 0.895, 'train_loss': 5.793399759568274e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22722487


fine tune epoch 133


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.3976, 'train_samples_per_second': 7.081, 'train_steps_per_second': 0.895, 'train_loss': 5.513883661478758e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22756122


fine tune epoch 134


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.056, 'train_samples_per_second': 7.108, 'train_steps_per_second': 0.898, 'train_loss': 5.248835077509284e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22789618


fine tune epoch 135


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.028, 'train_samples_per_second': 7.11, 'train_steps_per_second': 0.899, 'train_loss': 4.997007781639695e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22822948


fine tune epoch 136


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.0445, 'train_samples_per_second': 7.109, 'train_steps_per_second': 0.898, 'train_loss': 4.7589579480700195e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22856127


fine tune epoch 137


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.0573, 'train_samples_per_second': 7.108, 'train_steps_per_second': 0.898, 'train_loss': 4.533179453574121e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.228891


fine tune epoch 138


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.0897, 'train_samples_per_second': 7.105, 'train_steps_per_second': 0.898, 'train_loss': 4.318630963098258e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22921924


fine tune epoch 139


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.1308, 'train_samples_per_second': 7.102, 'train_steps_per_second': 0.898, 'train_loss': 4.1151230107061565e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22954561


fine tune epoch 140


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:30<00:00,  1.13s/it]


{'train_runtime': 90.275, 'train_samples_per_second': 7.012, 'train_steps_per_second': 0.886, 'train_loss': 3.922380856238306e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.22987036


fine tune epoch 141


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.11s/it]


{'train_runtime': 89.0456, 'train_samples_per_second': 7.109, 'train_steps_per_second': 0.898, 'train_loss': 3.7388733471743765e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23019266


fine tune epoch 142


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.7305, 'train_samples_per_second': 6.682, 'train_steps_per_second': 0.845, 'train_loss': 3.565982333384454e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2305135


fine tune epoch 143


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:36<00:00,  1.21s/it]


{'train_runtime': 96.5976, 'train_samples_per_second': 6.553, 'train_steps_per_second': 0.828, 'train_loss': 3.400744462851435e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23083219


fine tune epoch 144


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:33<00:00,  1.17s/it]


{'train_runtime': 93.3519, 'train_samples_per_second': 6.781, 'train_steps_per_second': 0.857, 'train_loss': 3.244240651838482e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23114854


fine tune epoch 145


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.4236, 'train_samples_per_second': 7.079, 'train_steps_per_second': 0.895, 'train_loss': 3.095801221206784e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2314627


fine tune epoch 146


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.2705, 'train_samples_per_second': 7.091, 'train_steps_per_second': 0.896, 'train_loss': 2.9545495635829866e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23177512


fine tune epoch 147


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:30<00:00,  1.13s/it]


{'train_runtime': 90.0226, 'train_samples_per_second': 7.032, 'train_steps_per_second': 0.889, 'train_loss': 2.820392546709627e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23208496


fine tune epoch 148


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:32<00:00,  1.16s/it]


{'train_runtime': 92.7407, 'train_samples_per_second': 6.825, 'train_steps_per_second': 0.863, 'train_loss': 2.692829875741154e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23239224


fine tune epoch 149


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.19s/it]


{'train_runtime': 94.9637, 'train_samples_per_second': 6.666, 'train_steps_per_second': 0.842, 'train_loss': 2.571654040366411e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23269789


fine tune epoch 150


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:33<00:00,  1.16s/it]


{'train_runtime': 93.0469, 'train_samples_per_second': 6.803, 'train_steps_per_second': 0.86, 'train_loss': 2.456867368891835e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23300098


fine tune epoch 151


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:36<00:00,  1.21s/it]


{'train_runtime': 96.4791, 'train_samples_per_second': 6.561, 'train_steps_per_second': 0.829, 'train_loss': 2.347555273445323e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23330228


fine tune epoch 152


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:38<00:00,  1.23s/it]


{'train_runtime': 98.0407, 'train_samples_per_second': 6.457, 'train_steps_per_second': 0.816, 'train_loss': 2.243905619252473e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23360099


fine tune epoch 153


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:44<00:00,  1.30s/it]


{'train_runtime': 104.2178, 'train_samples_per_second': 6.074, 'train_steps_per_second': 0.768, 'train_loss': 2.1444082085508855e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23389632


fine tune epoch 154


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:37<00:00,  1.22s/it]


{'train_runtime': 97.3662, 'train_samples_per_second': 6.501, 'train_steps_per_second': 0.822, 'train_loss': 2.0504988788161428e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23418947


fine tune epoch 155


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.3086, 'train_samples_per_second': 7.088, 'train_steps_per_second': 0.896, 'train_loss': 1.9614874327089636e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23448041


fine tune epoch 156


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.4804, 'train_samples_per_second': 7.074, 'train_steps_per_second': 0.894, 'train_loss': 1.8761084356810896e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23476905


fine tune epoch 157


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:30<00:00,  1.13s/it]


{'train_runtime': 90.0816, 'train_samples_per_second': 7.027, 'train_steps_per_second': 0.888, 'train_loss': 1.7950496112462132e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23505415


fine tune epoch 158


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:31<00:00,  1.14s/it]


{'train_runtime': 91.0726, 'train_samples_per_second': 6.951, 'train_steps_per_second': 0.878, 'train_loss': 1.718199928291142e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23533739


fine tune epoch 159


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.0087, 'train_samples_per_second': 6.733, 'train_steps_per_second': 0.851, 'train_loss': 1.6448147653136404e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2356183


fine tune epoch 160


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:35<00:00,  1.20s/it]


{'train_runtime': 95.9572, 'train_samples_per_second': 6.597, 'train_steps_per_second': 0.834, 'train_loss': 1.575005881022662e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23589589


fine tune epoch 161


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:33<00:00,  1.17s/it]


{'train_runtime': 93.7952, 'train_samples_per_second': 6.749, 'train_steps_per_second': 0.853, 'train_loss': 1.508437271695584e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23617147


fine tune epoch 162


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:32<00:00,  1.15s/it]


{'train_runtime': 92.2277, 'train_samples_per_second': 6.863, 'train_steps_per_second': 0.867, 'train_loss': 1.4452960749622435e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23644376


fine tune epoch 163


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:31<00:00,  1.15s/it]


{'train_runtime': 91.6625, 'train_samples_per_second': 6.906, 'train_steps_per_second': 0.873, 'train_loss': 1.38465067720972e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2367134


fine tune epoch 164


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:35<00:00,  1.20s/it]


{'train_runtime': 95.977, 'train_samples_per_second': 6.595, 'train_steps_per_second': 0.834, 'train_loss': 1.327040372416377e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23697925


fine tune epoch 165


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:36<00:00,  1.20s/it]


{'train_runtime': 96.2146, 'train_samples_per_second': 6.579, 'train_steps_per_second': 0.831, 'train_loss': 1.2727272405754775e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23724316


fine tune epoch 166


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:33<00:00,  1.17s/it]


{'train_runtime': 93.4371, 'train_samples_per_second': 6.775, 'train_steps_per_second': 0.856, 'train_loss': 1.2200718629173934e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23750359


fine tune epoch 167


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:31<00:00,  1.15s/it]


{'train_runtime': 91.8859, 'train_samples_per_second': 6.889, 'train_steps_per_second': 0.871, 'train_loss': 1.170694304164499e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23776312


fine tune epoch 168


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.2585, 'train_samples_per_second': 6.716, 'train_steps_per_second': 0.849, 'train_loss': 1.1230306699872018e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23801859


fine tune epoch 169


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.1677, 'train_samples_per_second': 6.722, 'train_steps_per_second': 0.85, 'train_loss': 1.0773973190225661e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2382711


fine tune epoch 170


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:33<00:00,  1.17s/it]


{'train_runtime': 93.916, 'train_samples_per_second': 6.74, 'train_steps_per_second': 0.852, 'train_loss': 1.0342412133468315e-05, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2385222


fine tune epoch 171


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.5446, 'train_samples_per_second': 6.695, 'train_steps_per_second': 0.846, 'train_loss': 9.934689296642319e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23876847


fine tune epoch 172


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.1299, 'train_samples_per_second': 6.725, 'train_steps_per_second': 0.85, 'train_loss': 9.540934843244032e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23901318


fine tune epoch 173


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:35<00:00,  1.20s/it]


{'train_runtime': 95.8033, 'train_samples_per_second': 6.607, 'train_steps_per_second': 0.835, 'train_loss': 9.165806113742292e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23925394


fine tune epoch 174


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.2772, 'train_samples_per_second': 6.714, 'train_steps_per_second': 0.849, 'train_loss': 8.804831304587424e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23949312


fine tune epoch 175


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:29<00:00,  1.12s/it]


{'train_runtime': 89.4381, 'train_samples_per_second': 7.078, 'train_steps_per_second': 0.894, 'train_loss': 8.463599806418643e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23972858


fine tune epoch 176


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.2095, 'train_samples_per_second': 6.719, 'train_steps_per_second': 0.849, 'train_loss': 8.13913211459294e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.23996273


fine tune epoch 177


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.217, 'train_samples_per_second': 6.719, 'train_steps_per_second': 0.849, 'train_loss': 7.8254692198243e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24019228


fine tune epoch 178


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:34<00:00,  1.18s/it]


{'train_runtime': 94.2412, 'train_samples_per_second': 6.717, 'train_steps_per_second': 0.849, 'train_loss': 7.525401451857761e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24041851


fine tune epoch 179


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:33<00:00,  1.17s/it]


{'train_runtime': 93.8815, 'train_samples_per_second': 6.743, 'train_steps_per_second': 0.852, 'train_loss': 7.239862316055223e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24064381


fine tune epoch 180


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:36<00:00,  1.21s/it]


{'train_runtime': 96.8096, 'train_samples_per_second': 6.539, 'train_steps_per_second': 0.826, 'train_loss': 6.972577830310911e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24086678


fine tune epoch 181


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:38<00:00,  1.23s/it]


{'train_runtime': 98.2186, 'train_samples_per_second': 6.445, 'train_steps_per_second': 0.815, 'train_loss': 6.711624882882461e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.2410881


fine tune epoch 182


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:41<00:00,  1.26s/it]


{'train_runtime': 101.0468, 'train_samples_per_second': 6.264, 'train_steps_per_second': 0.792, 'train_loss': 6.457563722506166e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24130364


fine tune epoch 183


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:38<00:00,  1.23s/it]


{'train_runtime': 98.6222, 'train_samples_per_second': 6.418, 'train_steps_per_second': 0.811, 'train_loss': 6.22157022007741e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24151741


fine tune epoch 184


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:38<00:00,  1.23s/it]


{'train_runtime': 98.1881, 'train_samples_per_second': 6.447, 'train_steps_per_second': 0.815, 'train_loss': 5.993957893224433e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24172926


fine tune epoch 185


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:43<00:00,  1.30s/it]


{'train_runtime': 103.7742, 'train_samples_per_second': 6.1, 'train_steps_per_second': 0.771, 'train_loss': 5.775659519713372e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24193859


fine tune epoch 186


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:32<00:00,  1.15s/it]


{'train_runtime': 92.3188, 'train_samples_per_second': 6.857, 'train_steps_per_second': 0.867, 'train_loss': 5.564437742577866e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24214427


fine tune epoch 187


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.9243, 'train_samples_per_second': 7.118, 'train_steps_per_second': 0.9, 'train_loss': 5.366067853174172e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24234651


fine tune epoch 188


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.9395, 'train_samples_per_second': 7.117, 'train_steps_per_second': 0.899, 'train_loss': 5.170491931494325e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24254467


fine tune epoch 189


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.987, 'train_samples_per_second': 7.113, 'train_steps_per_second': 0.899, 'train_loss': 4.990748857380823e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24274303


fine tune epoch 190


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.9449, 'train_samples_per_second': 7.117, 'train_steps_per_second': 0.899, 'train_loss': 4.811563121620565e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24293451


fine tune epoch 191


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.9796, 'train_samples_per_second': 7.114, 'train_steps_per_second': 0.899, 'train_loss': 4.6439261495834215e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.24312511


fine tune epoch 192


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
100%|██████████| 80/80 [01:28<00:00,  1.11s/it]


{'train_runtime': 88.9849, 'train_samples_per_second': 7.114, 'train_steps_per_second': 0.899, 'train_loss': 4.483555312617682e-06, 'epoch': 1.0}

Average Similarity Score (excluding diagonal):
0.243313


fine tune epoch 193


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
  0%|          | 0/80 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [1]:
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, InputExample, losses, util
import math
from sklearn.model_selection import train_test_split
import torch

# Load the dataset
# Modify the path to point to your csv file
csv_path = 'fine_tuning_dataset/all_merged_dataset.csv'
df = pd.read_csv(csv_path)

# Split the dataset into train and test sets
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

# Create InputExamples for training
train_examples = [InputExample(texts=[row['question'], row['context']]) for _, row in train_df.iterrows()]

# Create InputExamples for testing

# Create a DataLoader
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Use MultipleNegativesRankingLoss for training
train_loss = losses.MultipleNegativesRankingLoss(model)

# Fine-tune the model
num_epochs = 300
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)
questions = test_df['question'].values
contexts = test_df['context'].values

assert len(questions) == len(contexts), "The number of questions must match the number of contexts"

output_path = 'output_path_to_save_model'
for i in range(num_epochs):
    print(f"fine tune epoch {i+1}")
    model.fit(
        train_objectives=[(train_dataloader, train_loss)],
        epochs=1,
        warmup_steps=warmup_steps,
        output_path=output_path,
        scheduler=scheduler
    )
    model_test = SentenceTransformer(output_path, device='cpu')

    with torch.no_grad():
        question_embeddings = model.encode(questions, convert_to_tensor=True)
        context_embeddings = model.encode(contexts, convert_to_tensor=True)

        # Perform element-wise multiplication for each question-context pair
        combined_embeddings = torch.mul(question_embeddings, context_embeddings)

        # Calculate cosine similarity matrix for the combined embeddings
        similarity_matrix = util.cos_sim(combined_embeddings, combined_embeddings).cpu().numpy()

    # Extract the upper triangular part of the similarity matrix, excluding the diagonal
    triu_indices = np.triu_indices_from(similarity_matrix, k=1)
    triu_values = similarity_matrix[triu_indices]
    average_similarity_score = np.mean(triu_values)

    print("\nAverage Similarity Score (excluding diagonal):")
    print(average_similarity_score)
    print("\n" + "="*50 + "\n")

    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()
    del model_test

print("Model fine-tuning complete. Model saved to `output_path_to_save_model`.")

  from tqdm.autonotebook import tqdm, trange


fine tune epoch 1


AttributeError: 'OneCycleLR' object has no attribute 'lower'

In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader
from sentence_transformers import LoggingHandler
import logging
import os
import torch
from models import ModelName, models
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

# Setup logging
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO, handlers=[LoggingHandler()])

# File paths and model info
csv_path = 'fine_tuning_dataset/all_merged_dataset.csv'
model_name = ModelName.MULTILINGUAL_MINILM_FINETUNING_EARLY_STOP.value
output_path = models[model_name]['local_dir']

# Load the dataset
df = pd.read_csv(csv_path)

# Assume the CSV has columns: question, context
examples = [InputExample(texts=[row['question'], row['context']]) for idx, row in df.iterrows()]

# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logging.info(f"Using device: {device}")

# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
model.to(device)  # Move the model to the GPU if available

# Convert the dataset to a DataLoader
train_dataloader = DataLoader(examples, shuffle=True, batch_size=16)

# Define the loss function
train_loss = losses.MultipleNegativesRankingLoss(model)

# Evaluation during training using EmbeddingSimilarityEvaluator
sentences1 = df['question'].tolist()
sentences2 = df['context'].tolist()
evaluator = EmbeddingSimilarityEvaluator(sentences1, sentences2, [1.0]*len(sentences1))  # Assuming 1.0 similarity score for all pairs

# Custom EarlyStopping class
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.best_score = None
        self.early_stop = False
        self.counter = 0

    def __call__(self, score, model, model_path):
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                logging.info(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, score, model, model_path):
        '''Saves model when evaluation score improves.'''
        if self.verbose:
            logging.info(f'Evaluation score improved to {score}; saving model to {model_path}')
        model.save(model_path)


# Early stopping callback
early_stopping = EarlyStopping(patience=5, verbose=True)

# Make sure output path exists
os.makedirs(output_path, exist_ok=True)

# Train the model with early stopping
epoch = 0
while epoch < 50:
    model.fit(train_objectives=[(train_dataloader, train_loss)],
              epochs=1,
              evaluator=evaluator,
              evaluation_steps=1000,
              output_path=output_path,
              show_progress_bar=True)
    
    # Evaluate the model
    score = evaluator(model, output_path)
    print(score)

    early_stopping(score, model, output_path)
    if early_stopping.early_stop:
        logging.info("Early stopping")
        break

    epoch += 1

logging.info("Training finished")

# Save the final model
model.save(os.path.join(output_path, 'fine_tuned_model'))

  from tqdm.autonotebook import tqdm, trange


2024-06-21 22:06:01,126 - Using device: cuda
2024-06-21 22:06:01,126 - Use pytorch device_name: cuda
2024-06-21 22:06:01,126 - Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
  0%|          | 0/44 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 

In [None]:
# # Example evaluation - you may need to adapt according to your dataset
# evaluation_examples = [
#     ("How are you?", "How do you do?"),
#     ("What is your name?", "What's your name?"),
#     ("Where do you live?", "Where is your home located?")
# ]

# model = SentenceTransformer(output_path)

# for pair in evaluation_examples:
#     embeddings = model.encode(pair)
#     similarity = util.pytorch_cos_sim(embeddings[0], embeddings[1])
#     print(f"Similarity between: '{pair[0]}' and '{pair[1]}' is {similarity.item():.4f}")