In [None]:
!pip install datasets evaluate

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m6

In [None]:
import random
import numpy as np
import torch
import pandas as pd
import itertools
from sklearn.model_selection import train_test_split

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import evaluate
from torch.utils.data import Dataset, DataLoader

In [None]:


# ------------------------------------------------------------------------
# 1) Read & Sample the Dataset (5% in a balanced way)
# ------------------------------------------------------------------------
df = pd.read_parquet("hf://datasets/TimKoornstra/financial-tweets-sentiment/data/train-00000-of-00001.parquet")

label_col = "sentiment"  # or your actual label column
text_col  = "tweet"      # or your actual text column

# Balanced sampling (5% per class)
df_small = df.groupby(label_col, group_keys=False).apply(
    lambda x: x.sample(frac=0.05, random_state=42)
).reset_index(drop=True)

print("Original dataset size:", len(df))
print("Reduced (5%) dataset size:", len(df_small))
print("Sampled class distribution:")
print(df_small[label_col].value_counts())

# ------------------------------------------------------------------------
# 2) Define Hyperparameter Grid
# ------------------------------------------------------------------------
models = [
    "ProsusAI/finbert",         # FinBERT
    "prajjwal1/bert-tiny",      # BERT Tiny
    "vinai/bertweet-base",      # BERTweet
    "cardiffnlp/twitter-roberta-base-sentiment-latest"  # Twitter RoBERTa
]
optimizers = ["adam", "sgd"]  # We'll show how to map these to torch.optim
learning_rates = [1e-5, 3e-5, 5e-5]
epochs_list = [1, 2, 3]
seeds = [42, 1984, 1337]

param_combinations = list(itertools.product(models, optimizers, learning_rates, epochs_list, seeds))

# ------------------------------------------------------------------------
# 3) Create a Custom Dataset & Data Collation
# ------------------------------------------------------------------------
class TextClassificationDataset(Dataset):
    def __init__(self, df, tokenizer, text_col, label_col, max_len=128):
        self.tokenizer = tokenizer
        self.texts = df[text_col].tolist()
        self.labels_raw = df[label_col].tolist()
        self.max_len = max_len

        # You could map labels from strings to IDs if needed:
        # e.g., self.label2id = {label: i for i, label in enumerate(sorted(df[label_col].unique()))}
        # self.labels = [self.label2id[x] for x in self.labels_raw]
        # But if your labels are already integers [0,1,2,...], you can keep them directly:
        # We'll just auto-detect if they look like strings or ints:

        unique_labels = sorted(list(set(self.labels_raw)))
        self.label2id = {lab: i for i, lab in enumerate(unique_labels)}
        self.labels = [self.label2id[l] for l in self.labels_raw]

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"  # returns dict of tensors
        )
        # encoding["input_ids"] has shape [1, max_len], so we will squeeze(0).
        item = {key: val.squeeze(0) for key, val in encoding.items()}
        item["labels"] = torch.tensor(label, dtype=torch.long)

        return item

def create_dataloader(df_part, tokenizer, text_col, label_col, batch_size=16, shuffle=True):
    ds = TextClassificationDataset(df_part, tokenizer, text_col, label_col)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=shuffle)
    return loader, ds.label2id

# ------------------------------------------------------------------------
# 4) Training & Evaluation (Manual Loop)
# ------------------------------------------------------------------------
def train_and_evaluate(model_name, optimizer_name, learning_rate, num_epochs, seed_value, df_data):
    # Set seeds
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # 4a) Split data into train/val
    train_df, val_df = train_test_split(
        df_data,
        test_size=0.2,
        random_state=seed_value,
        stratify=df_data[label_col]
    )

    # 4b) Load tokenizer & model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=len(df_data[label_col].unique())
    )
    model.to(device)

    # 4c) Create data loaders
    train_loader, label2id = create_dataloader(train_df, tokenizer, text_col, label_col, batch_size=16, shuffle=True)
    val_loader, _          = create_dataloader(val_df,   tokenizer, text_col, label_col, batch_size=16, shuffle=False)

    # 4d) Create optimizer
    if optimizer_name.lower() == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name.lower() == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    else:
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    # 4e) Training loop
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        for batch in train_loader:
            # Move data to GPU if available
            input_ids      = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels         = batch["labels"].to(device)

            # Zero out gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

            loss = outputs.loss
            total_loss += loss.item()

            # Backward pass
            loss.backward()
            optimizer.step()

        avg_train_loss = total_loss / len(train_loader)

        # Evaluate on validation set after each epoch
        model.eval()
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for batch in val_loader:
                input_ids      = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels         = batch["labels"].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                preds = torch.argmax(logits, dim=-1)

                all_preds.extend(preds.cpu().tolist())
                all_labels.extend(labels.cpu().tolist())

        # Compute accuracy
        accuracy = np.mean(np.array(all_preds) == np.array(all_labels))
        print(f"[{model_name}] Epoch {epoch+1}/{num_epochs} | "
              f"Train Loss: {avg_train_loss:.4f} | Val Accuracy: {accuracy:.4f}")

    # Final validation accuracy after all epochs
    return accuracy

# ------------------------------------------------------------------------
# 5) Run the Grid Search & Collect Results
# ------------------------------------------------------------------------
results = []
for (model_name, opt_name, lr, ep, seed_val) in param_combinations:
    print(f"=== Now training: model={model_name}, optimizer={opt_name}, lr={lr}, epochs={ep}, seed={seed_val} ===")
    accuracy = train_and_evaluate(
        model_name=model_name,
        optimizer_name=opt_name,
        learning_rate=lr,
        num_epochs=ep,
        seed_value=seed_val,
        df_data=df_small
    )

    results.append({
        "model": model_name,
        "optimizer": opt_name,
        "learning_rate": lr,
        "epochs": ep,
        "seed": seed_val,
        "accuracy": accuracy
    })

# ------------------------------------------------------------------------
# 6) Identify the Best Combination
# ------------------------------------------------------------------------
best_result = max(results, key=lambda x: x["accuracy"])

print("\nAll results:")
for r in results:
    print(r)

print("\nBest hyperparameters based on accuracy:")
print(best_result)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Original dataset size: 38091
Reduced (5%) dataset size: 1904
Sampled class distribution:
sentiment
1    868
0    609
2    427
Name: count, dtype: int64
=== Now training: model=ProsusAI/finbert, optimizer=adam, lr=1e-05, epochs=1, seed=42 ===


  df_small = df.groupby(label_col, group_keys=False).apply(


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

[ProsusAI/finbert] Epoch 1/1 | Train Loss: 1.1382 | Val Accuracy: 0.5249
=== Now training: model=ProsusAI/finbert, optimizer=adam, lr=1e-05, epochs=1, seed=1984 ===
[ProsusAI/finbert] Epoch 1/1 | Train Loss: 1.1855 | Val Accuracy: 0.5328
=== Now training: model=ProsusAI/finbert, optimizer=adam, lr=1e-05, epochs=1, seed=1337 ===
[ProsusAI/finbert] Epoch 1/1 | Train Loss: 1.1986 | Val Accuracy: 0.4777
=== Now training: model=ProsusAI/finbert, optimizer=adam, lr=1e-05, epochs=2, seed=42 ===
[ProsusAI/finbert] Epoch 1/2 | Train Loss: 1.1382 | Val Accuracy: 0.5249
[ProsusAI/finbert] Epoch 2/2 | Train Loss: 0.8827 | Val Accuracy: 0.5538
=== Now training: model=ProsusAI/finbert, optimizer=adam, lr=1e-05, epochs=2, seed=1984 ===
[ProsusAI/finbert] Epoch 1/2 | Train Loss: 1.1855 | Val Accuracy: 0.5328
[ProsusAI/finbert] Epoch 2/2 | Train Loss: 0.9143 | Val Accuracy: 0.5591
=== Now training: model=ProsusAI/finbert, optimizer=adam, lr=1e-05, epochs=2, seed=1337 ===
[ProsusAI/finbert] Epoch 1/2 | 

config.json:   0%|          | 0.00/285 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0645 | Val Accuracy: 0.4541
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=1, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.1072 | Val Accuracy: 0.4016
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=1, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0775 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=2, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0645 | Val Accuracy: 0.4541
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0553 | Val Accuracy: 0.4541
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=2, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.1072 | Val Accuracy: 0.4016
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0651 | Val Accuracy: 0.4619
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=2, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0775 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0607 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=3, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0645 | Val Accuracy: 0.4541
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0553 | Val Accuracy: 0.4541
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0440 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=3, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.1072 | Val Accuracy: 0.4016
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0651 | Val Accuracy: 0.4619
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0552 | Val Accuracy: 0.4619
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=1e-05, epochs=3, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0775 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0607 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0507 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=1, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0542 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=1, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0768 | Val Accuracy: 0.4593
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=1, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0667 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=2, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0542 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0376 | Val Accuracy: 0.4672
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=2, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0768 | Val Accuracy: 0.4593
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0352 | Val Accuracy: 0.4934
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=2, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0667 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0407 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=3, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0542 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0376 | Val Accuracy: 0.4672
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0002 | Val Accuracy: 0.5381
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=3, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0768 | Val Accuracy: 0.4593
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0352 | Val Accuracy: 0.4934
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0105 | Val Accuracy: 0.5512
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=3e-05, epochs=3, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0667 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0407 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0035 | Val Accuracy: 0.5696
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=1, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0487 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=1, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0656 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=1, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0609 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=2, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0487 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0184 | Val Accuracy: 0.5249
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=2, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0656 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0138 | Val Accuracy: 0.5486
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=2, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0609 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0164 | Val Accuracy: 0.5827
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=3, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0487 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0184 | Val Accuracy: 0.5249
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 0.9478 | Val Accuracy: 0.5643
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=3, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0656 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0138 | Val Accuracy: 0.5486
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 0.9635 | Val Accuracy: 0.5696
=== Now training: model=prajjwal1/bert-tiny, optimizer=adam, lr=5e-05, epochs=3, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0609 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0164 | Val Accuracy: 0.5827
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 0.9483 | Val Accuracy: 0.5958
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=1, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0792 | Val Accuracy: 0.4199
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=1, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.1447 | Val Accuracy: 0.2205
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=1, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0885 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=2, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0792 | Val Accuracy: 0.4199
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0795 | Val Accuracy: 0.4278
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=2, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.1447 | Val Accuracy: 0.2205
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.1375 | Val Accuracy: 0.2178
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=2, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0885 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0880 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=3, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0792 | Val Accuracy: 0.4199
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0795 | Val Accuracy: 0.4278
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0781 | Val Accuracy: 0.4357
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=3, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.1447 | Val Accuracy: 0.2205
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.1375 | Val Accuracy: 0.2178
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.1365 | Val Accuracy: 0.2257
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=1e-05, epochs=3, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0885 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0880 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0848 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=1, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0781 | Val Accuracy: 0.4357
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=1, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.1406 | Val Accuracy: 0.2257
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=1, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0879 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=2, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0781 | Val Accuracy: 0.4357
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0768 | Val Accuracy: 0.4304
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=2, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.1406 | Val Accuracy: 0.2257
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.1261 | Val Accuracy: 0.2598
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=2, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0879 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0859 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=3, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0781 | Val Accuracy: 0.4357
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0768 | Val Accuracy: 0.4304
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0737 | Val Accuracy: 0.4357
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=3, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.1406 | Val Accuracy: 0.2257
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.1261 | Val Accuracy: 0.2598
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.1196 | Val Accuracy: 0.2992
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=3e-05, epochs=3, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0879 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0859 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0814 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=1, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0772 | Val Accuracy: 0.4331
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=1, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.1368 | Val Accuracy: 0.2493
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=1, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/1 | Train Loss: 1.0872 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=2, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0772 | Val Accuracy: 0.4331
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0746 | Val Accuracy: 0.4357
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=2, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.1368 | Val Accuracy: 0.2493
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.1163 | Val Accuracy: 0.3018
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=2, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/2 | Train Loss: 1.0872 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/2 | Train Loss: 1.0840 | Val Accuracy: 0.4567
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=3, seed=42 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0772 | Val Accuracy: 0.4331
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0746 | Val Accuracy: 0.4357
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0702 | Val Accuracy: 0.4436
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=3, seed=1984 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.1368 | Val Accuracy: 0.2493
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.1163 | Val Accuracy: 0.3018
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.1065 | Val Accuracy: 0.3648
=== Now training: model=prajjwal1/bert-tiny, optimizer=sgd, lr=5e-05, epochs=3, seed=1337 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[prajjwal1/bert-tiny] Epoch 1/3 | Train Loss: 1.0872 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 2/3 | Train Loss: 1.0840 | Val Accuracy: 0.4567
[prajjwal1/bert-tiny] Epoch 3/3 | Train Loss: 1.0787 | Val Accuracy: 0.4567
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=1, seed=42 ===


config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/843k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.91M [00:00<?, ?B/s]

emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0


pytorch_model.bin:   0%|          | 0.00/543M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 0.9940 | Val Accuracy: 0.6325
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=1, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0207 | Val Accuracy: 0.6063
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=1, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0166 | Val Accuracy: 0.6168
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=2, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 0.9940 | Val Accuracy: 0.6325
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.8295 | Val Accuracy: 0.6745
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=2, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0207 | Val Accuracy: 0.6063
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.8740 | Val Accuracy: 0.6719
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=2, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0166 | Val Accuracy: 0.6168
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.8115 | Val Accuracy: 0.7087
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=3, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 0.9940 | Val Accuracy: 0.6325
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.8295 | Val Accuracy: 0.6745
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.7539 | Val Accuracy: 0.6955
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=3, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0207 | Val Accuracy: 0.6063
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.8740 | Val Accuracy: 0.6719
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.6782 | Val Accuracy: 0.7139
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=1e-05, epochs=3, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0166 | Val Accuracy: 0.6168
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.8115 | Val Accuracy: 0.7087
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.6181 | Val Accuracy: 0.7244
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=1, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 0.9663 | Val Accuracy: 0.6430
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=1, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 0.9489 | Val Accuracy: 0.6850
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=1, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 0.9183 | Val Accuracy: 0.6640
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=2, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 0.9663 | Val Accuracy: 0.6430
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.7574 | Val Accuracy: 0.6982
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=2, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 0.9489 | Val Accuracy: 0.6850
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.6763 | Val Accuracy: 0.7349
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=2, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 0.9183 | Val Accuracy: 0.6640
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.6303 | Val Accuracy: 0.7113
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=3, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 0.9663 | Val Accuracy: 0.6430
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.7574 | Val Accuracy: 0.6982
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.5447 | Val Accuracy: 0.7428
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=3, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 0.9489 | Val Accuracy: 0.6850
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.6763 | Val Accuracy: 0.7349
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.4429 | Val Accuracy: 0.7218
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=3e-05, epochs=3, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 0.9183 | Val Accuracy: 0.6640
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.6303 | Val Accuracy: 0.7113
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.3888 | Val Accuracy: 0.7244
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=1, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 0.9567 | Val Accuracy: 0.6772
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=1, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 0.9394 | Val Accuracy: 0.5932
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=1, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 0.8846 | Val Accuracy: 0.6089
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=2, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 0.9567 | Val Accuracy: 0.6772
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.6892 | Val Accuracy: 0.7323
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=2, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 0.9394 | Val Accuracy: 0.5932
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.7035 | Val Accuracy: 0.6772
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=2, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 0.8846 | Val Accuracy: 0.6089
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 0.5949 | Val Accuracy: 0.7559
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=3, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 0.9567 | Val Accuracy: 0.6772
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.6892 | Val Accuracy: 0.7323
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.4292 | Val Accuracy: 0.7375
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=3, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 0.9394 | Val Accuracy: 0.5932
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.7035 | Val Accuracy: 0.6772
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.8449 | Val Accuracy: 0.4619
=== Now training: model=vinai/bertweet-base, optimizer=adam, lr=5e-05, epochs=3, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 0.8846 | Val Accuracy: 0.6089
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 0.5949 | Val Accuracy: 0.7559
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 0.3535 | Val Accuracy: 0.7402
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=1, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0993 | Val Accuracy: 0.3701
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=1, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0942 | Val Accuracy: 0.4514
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=1, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.1004 | Val Accuracy: 0.3255
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=2, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0993 | Val Accuracy: 0.3701
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.1001 | Val Accuracy: 0.3832
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=2, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0942 | Val Accuracy: 0.4514
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.0922 | Val Accuracy: 0.4514
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=2, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.1004 | Val Accuracy: 0.3255
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.1013 | Val Accuracy: 0.3255
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=3, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0993 | Val Accuracy: 0.3701
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.1001 | Val Accuracy: 0.3832
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0970 | Val Accuracy: 0.4068
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=3, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0942 | Val Accuracy: 0.4514
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.0922 | Val Accuracy: 0.4514
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0931 | Val Accuracy: 0.4436
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=1e-05, epochs=3, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.1004 | Val Accuracy: 0.3255
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.1013 | Val Accuracy: 0.3255
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0989 | Val Accuracy: 0.3255
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=1, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0976 | Val Accuracy: 0.4042
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=1, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0931 | Val Accuracy: 0.4436
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=1, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0989 | Val Accuracy: 0.3255
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=2, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0976 | Val Accuracy: 0.4042
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.0956 | Val Accuracy: 0.4357
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=2, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0931 | Val Accuracy: 0.4436
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.0891 | Val Accuracy: 0.4541
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=2, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0989 | Val Accuracy: 0.3255
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.0968 | Val Accuracy: 0.3176
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=3, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0976 | Val Accuracy: 0.4042
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.0956 | Val Accuracy: 0.4357
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0898 | Val Accuracy: 0.4672
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=3, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0931 | Val Accuracy: 0.4436
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.0891 | Val Accuracy: 0.4541
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0881 | Val Accuracy: 0.4567
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=3e-05, epochs=3, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0989 | Val Accuracy: 0.3255
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.0968 | Val Accuracy: 0.3176
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0922 | Val Accuracy: 0.3150
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=1, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0961 | Val Accuracy: 0.4462
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=1, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0920 | Val Accuracy: 0.4514
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=1, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/1 | Train Loss: 1.0975 | Val Accuracy: 0.3176
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=2, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0961 | Val Accuracy: 0.4462
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.0916 | Val Accuracy: 0.4724
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=2, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0920 | Val Accuracy: 0.4514
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.0864 | Val Accuracy: 0.4567
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=2, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/2 | Train Loss: 1.0975 | Val Accuracy: 0.3176
[vinai/bertweet-base] Epoch 2/2 | Train Loss: 1.0928 | Val Accuracy: 0.3097
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=3, seed=42 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0961 | Val Accuracy: 0.4462
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.0916 | Val Accuracy: 0.4724
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0838 | Val Accuracy: 0.4698
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=3, seed=1984 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0920 | Val Accuracy: 0.4514
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.0864 | Val Accuracy: 0.4567
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0839 | Val Accuracy: 0.4567
=== Now training: model=vinai/bertweet-base, optimizer=sgd, lr=5e-05, epochs=3, seed=1337 ===


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[vinai/bertweet-base] Epoch 1/3 | Train Loss: 1.0975 | Val Accuracy: 0.3176
[vinai/bertweet-base] Epoch 2/3 | Train Loss: 1.0928 | Val Accuracy: 0.3097
[vinai/bertweet-base] Epoch 3/3 | Train Loss: 1.0866 | Val Accuracy: 0.3832
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=1, seed=42 ===


config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.0394 | Val Accuracy: 0.6063
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=1, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.0362 | Val Accuracy: 0.6037
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=1, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.0625 | Val Accuracy: 0.6010
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=2, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.0394 | Val Accuracy: 0.6063
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.8219 | Val Accuracy: 0.6325
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=2, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.0362 | Val Accuracy: 0.6037
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.7867 | Val Accuracy: 0.6798
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=2, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.0625 | Val Accuracy: 0.6010
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.8083 | Val Accuracy: 0.6562
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=3, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0394 | Val Accuracy: 0.6063
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.8219 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.6292 | Val Accuracy: 0.6929
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=3, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0362 | Val Accuracy: 0.6037
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7867 | Val Accuracy: 0.6798
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.5773 | Val Accuracy: 0.7349
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=1e-05, epochs=3, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0625 | Val Accuracy: 0.6010
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.8083 | Val Accuracy: 0.6562
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.5890 | Val Accuracy: 0.7480
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=1, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 0.9638 | Val Accuracy: 0.6115
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=1, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 0.9480 | Val Accuracy: 0.5801
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=1, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 0.9939 | Val Accuracy: 0.6247
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=2, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 0.9638 | Val Accuracy: 0.6115
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.7017 | Val Accuracy: 0.6457
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=2, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 0.9480 | Val Accuracy: 0.5801
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.6447 | Val Accuracy: 0.6745
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=2, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 0.9939 | Val Accuracy: 0.6247
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.6613 | Val Accuracy: 0.7585
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=3, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9638 | Val Accuracy: 0.6115
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7017 | Val Accuracy: 0.6457
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4275 | Val Accuracy: 0.7402
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=3, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9480 | Val Accuracy: 0.5801
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6447 | Val Accuracy: 0.6745
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3635 | Val Accuracy: 0.7323
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=3e-05, epochs=3, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9939 | Val Accuracy: 0.6247
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6613 | Val Accuracy: 0.7585
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3314 | Val Accuracy: 0.7480
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=1, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 0.9711 | Val Accuracy: 0.6220
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=1, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 0.9232 | Val Accuracy: 0.6168
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=1, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 0.9721 | Val Accuracy: 0.6693
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=2, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 0.9711 | Val Accuracy: 0.6220
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.6755 | Val Accuracy: 0.6562
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=2, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 0.9232 | Val Accuracy: 0.6168
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.6006 | Val Accuracy: 0.6824
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=2, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 0.9721 | Val Accuracy: 0.6693
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 0.5816 | Val Accuracy: 0.7375
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=3, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9711 | Val Accuracy: 0.6220
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6755 | Val Accuracy: 0.6562
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3752 | Val Accuracy: 0.7192
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=3, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9232 | Val Accuracy: 0.6168
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6006 | Val Accuracy: 0.6824
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3115 | Val Accuracy: 0.7612
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=adam, lr=5e-05, epochs=3, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9721 | Val Accuracy: 0.6693
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.5816 | Val Accuracy: 0.7375
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2956 | Val Accuracy: 0.7454
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=1, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.8513 | Val Accuracy: 0.3491
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=1, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.8738 | Val Accuracy: 0.3570
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=1, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.8984 | Val Accuracy: 0.3386
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=2, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.8513 | Val Accuracy: 0.3491
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.5579 | Val Accuracy: 0.3780
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=2, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.8738 | Val Accuracy: 0.3570
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.5696 | Val Accuracy: 0.3832
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=2, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.8984 | Val Accuracy: 0.3386
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.5638 | Val Accuracy: 0.3727
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=3, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.8513 | Val Accuracy: 0.3491
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.5579 | Val Accuracy: 0.3780
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.4319 | Val Accuracy: 0.3832
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=3, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.8738 | Val Accuracy: 0.3570
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.5696 | Val Accuracy: 0.3832
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.4596 | Val Accuracy: 0.3911
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=1e-05, epochs=3, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.8984 | Val Accuracy: 0.3386
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.5638 | Val Accuracy: 0.3727
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.4420 | Val Accuracy: 0.3675
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=1, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.6153 | Val Accuracy: 0.3780
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=1, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.6331 | Val Accuracy: 0.3911
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=1, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.6506 | Val Accuracy: 0.3701
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=2, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.6153 | Val Accuracy: 0.3780
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.3157 | Val Accuracy: 0.3675
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=2, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.6331 | Val Accuracy: 0.3911
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.3178 | Val Accuracy: 0.3937
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=2, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.6506 | Val Accuracy: 0.3701
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.3156 | Val Accuracy: 0.3806
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=3, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.6153 | Val Accuracy: 0.3780
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.3157 | Val Accuracy: 0.3675
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.2003 | Val Accuracy: 0.3885
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=3, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.6331 | Val Accuracy: 0.3911
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.3178 | Val Accuracy: 0.3937
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.2072 | Val Accuracy: 0.3937
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=3e-05, epochs=3, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.6506 | Val Accuracy: 0.3701
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.3156 | Val Accuracy: 0.3806
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.1994 | Val Accuracy: 0.4042
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=1, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.5058 | Val Accuracy: 0.3675
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=1, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.5202 | Val Accuracy: 0.3937
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=1, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/1 | Train Loss: 1.5323 | Val Accuracy: 0.3806
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=2, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.5058 | Val Accuracy: 0.3675
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.2058 | Val Accuracy: 0.3753
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=2, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.5202 | Val Accuracy: 0.3937
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.2082 | Val Accuracy: 0.3858
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=2, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/2 | Train Loss: 1.5323 | Val Accuracy: 0.3806
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/2 | Train Loss: 1.2026 | Val Accuracy: 0.3780
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=3, seed=42 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.5058 | Val Accuracy: 0.3675
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.2058 | Val Accuracy: 0.3753
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.1258 | Val Accuracy: 0.3990
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=3, seed=1984 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.5202 | Val Accuracy: 0.3937
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.2082 | Val Accuracy: 0.3858
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.1262 | Val Accuracy: 0.3937
=== Now training: model=cardiffnlp/twitter-roberta-base-sentiment-latest, optimizer=sgd, lr=5e-05, epochs=3, seed=1337 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.5323 | Val Accuracy: 0.3806
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.2026 | Val Accuracy: 0.3780
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.1209 | Val Accuracy: 0.4042

All results:
{'model': 'ProsusAI/finbert', 'optimizer': 'adam', 'learning_rate': 1e-05, 'epochs': 1, 'seed': 42, 'accuracy': 0.5249343832020997}
{'model': 'ProsusAI/finbert', 'optimizer': 'adam', 'learning_rate': 1e-05, 'epochs': 1, 'seed': 1984, 'accuracy': 0.5328083989501312}
{'model': 'ProsusAI/finbert', 'optimizer': 'adam', 'learning_rate': 1e-05, 'epochs': 1, 'seed': 1337, 'accuracy': 0.4776902887139108}
{'model': 'ProsusAI/finbert', 'optimizer': 'adam', 'learning_rate': 1e-05, 'epochs': 2, 'seed': 42, 'accuracy': 0.5538057742782152}
{'model': 'ProsusAI/finbert', 'optimizer': 'adam', 'learning_rate': 1e-05, 'epochs': 2, 'seed': 1984, 'accuracy': 0.5590551181102362}
{'mod

In [None]:
def find_best_seed_for_fixed_params(
    model_name,
    optimizer_name,
    learning_rate,
    num_epochs,
    df_data,
    n_seeds=100,
    seed_range=(0, 9999)
):
    """
    This function picks random seeds within `seed_range` (default 0..9999),
    samples n_seeds of them, and tries each seed with the same model/optimizer/hparams.
    Returns the best seed + accuracy found.
    """
    import random

    # Generate "n_seeds" unique random seeds from the given range.
    # If you prefer consecutive seeds 0..99, you can just do range(n_seeds).
    candidate_seeds = random.sample(range(seed_range[0], seed_range[1]+1), n_seeds)

    seed_scores = []
    best_score = float('-inf')
    best_seed = None

    for s in candidate_seeds:
        print(f"\n=== Training with seed={s} ===")
        accuracy = train_and_evaluate(
            model_name=model_name,
            optimizer_name=optimizer_name,
            learning_rate=learning_rate,
            num_epochs=num_epochs,
            seed_value=s,
            df_data=df_data
        )
        seed_scores.append({"seed": s, "accuracy": accuracy})

        # Track the best so far
        if accuracy > best_score:
            best_score = accuracy
            best_seed = s

    print("\nAll seed results:")
    for item in seed_scores:
        print(item)

    print(f"\nBest seed out of {n_seeds} tested: {best_seed} with accuracy: {best_score:.4f}")
    return best_seed, best_score, seed_scores

# ------------------------------------------------------------------------
# Example usage:
# We already know the best model + optimizer + lr + epochs from your search.
best_model   = "cardiffnlp/twitter-roberta-base-sentiment-latest"
best_opt     = "adam"
best_lr      = 5e-5
best_epochs  = 3

best_seed_over_100, best_acc_over_100, seed_experiments = find_best_seed_for_fixed_params(
    model_name    = best_model,
    optimizer_name= best_opt,
    learning_rate = best_lr,
    num_epochs    = best_epochs,
    df_data       = df_small,       # The 5% sampled dataset
    n_seeds       = 100,            # Number of different seeds to try
    seed_range    = (0, 9999)       # or any other range you prefer
)



=== Training with seed=7871 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9598 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6763 | Val Accuracy: 0.7192
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3621 | Val Accuracy: 0.7375

=== Training with seed=1026 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9537 | Val Accuracy: 0.5984
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6626 | Val Accuracy: 0.6955
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3773 | Val Accuracy: 0.7192

=== Training with seed=4217 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9614 | Val Accuracy: 0.6588
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6749 | Val Accuracy: 0.7323
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3851 | Val Accuracy: 0.7612

=== Training with seed=3699 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0174 | Val Accuracy: 0.6220
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7269 | Val Accuracy: 0.7008
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3983 | Val Accuracy: 0.6929

=== Training with seed=8085 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9441 | Val Accuracy: 0.6483
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7034 | Val Accuracy: 0.7087
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4008 | Val Accuracy: 0.7165

=== Training with seed=8902 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.8980 | Val Accuracy: 0.6772
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6198 | Val Accuracy: 0.7533
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3066 | Val Accuracy: 0.8005

=== Training with seed=9236 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9778 | Val Accuracy: 0.5958
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7522 | Val Accuracy: 0.6877
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.5510 | Val Accuracy: 0.7349

=== Training with seed=61 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9683 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6970 | Val Accuracy: 0.6509
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4049 | Val Accuracy: 0.7165

=== Training with seed=23 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9763 | Val Accuracy: 0.6588
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7104 | Val Accuracy: 0.7559
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3826 | Val Accuracy: 0.7454

=== Training with seed=6538 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9217 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6622 | Val Accuracy: 0.7375
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3792 | Val Accuracy: 0.7323

=== Training with seed=5631 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9690 | Val Accuracy: 0.6168
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7325 | Val Accuracy: 0.7165
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4342 | Val Accuracy: 0.7165

=== Training with seed=2358 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9585 | Val Accuracy: 0.6247
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6524 | Val Accuracy: 0.7533
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3370 | Val Accuracy: 0.7612

=== Training with seed=7897 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0055 | Val Accuracy: 0.6220
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7170 | Val Accuracy: 0.7244
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3803 | Val Accuracy: 0.6877

=== Training with seed=8534 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9611 | Val Accuracy: 0.6824
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6707 | Val Accuracy: 0.7612
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3626 | Val Accuracy: 0.7664

=== Training with seed=2395 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9420 | Val Accuracy: 0.6588
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6351 | Val Accuracy: 0.7244
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3251 | Val Accuracy: 0.7612

=== Training with seed=1209 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9130 | Val Accuracy: 0.6509
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.5912 | Val Accuracy: 0.7349
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2539 | Val Accuracy: 0.7428

=== Training with seed=513 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9756 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6774 | Val Accuracy: 0.7270
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3606 | Val Accuracy: 0.7507

=== Training with seed=668 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9539 | Val Accuracy: 0.5643
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7750 | Val Accuracy: 0.7034
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4764 | Val Accuracy: 0.7349

=== Training with seed=987 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9137 | Val Accuracy: 0.5564
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6189 | Val Accuracy: 0.6850
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3080 | Val Accuracy: 0.7244

=== Training with seed=3580 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9263 | Val Accuracy: 0.6247
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7143 | Val Accuracy: 0.7008
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3585 | Val Accuracy: 0.7165

=== Training with seed=2849 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9615 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6802 | Val Accuracy: 0.7507
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4559 | Val Accuracy: 0.7402

=== Training with seed=5047 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9357 | Val Accuracy: 0.5748
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6837 | Val Accuracy: 0.7323
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3430 | Val Accuracy: 0.7218

=== Training with seed=8289 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9754 | Val Accuracy: 0.5958
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6805 | Val Accuracy: 0.7244
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3580 | Val Accuracy: 0.7244

=== Training with seed=2501 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9700 | Val Accuracy: 0.6483
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7153 | Val Accuracy: 0.6982
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3774 | Val Accuracy: 0.7087

=== Training with seed=3681 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9578 | Val Accuracy: 0.6378
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6590 | Val Accuracy: 0.7034
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3511 | Val Accuracy: 0.6719

=== Training with seed=3713 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9484 | Val Accuracy: 0.6037
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6833 | Val Accuracy: 0.6667
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3695 | Val Accuracy: 0.7165

=== Training with seed=7472 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9650 | Val Accuracy: 0.6220
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6794 | Val Accuracy: 0.7717
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3447 | Val Accuracy: 0.7480

=== Training with seed=6516 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9496 | Val Accuracy: 0.6273
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6724 | Val Accuracy: 0.7087
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3067 | Val Accuracy: 0.7454

=== Training with seed=6826 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9603 | Val Accuracy: 0.6588
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6503 | Val Accuracy: 0.7402
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3018 | Val Accuracy: 0.7743

=== Training with seed=6827 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9333 | Val Accuracy: 0.6430
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6213 | Val Accuracy: 0.7270
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3188 | Val Accuracy: 0.7034

=== Training with seed=1673 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9481 | Val Accuracy: 0.5984
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7018 | Val Accuracy: 0.7218
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3894 | Val Accuracy: 0.7585

=== Training with seed=2522 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9386 | Val Accuracy: 0.5669
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6073 | Val Accuracy: 0.7690
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2829 | Val Accuracy: 0.7612

=== Training with seed=4850 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9187 | Val Accuracy: 0.6457
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6208 | Val Accuracy: 0.7165
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3162 | Val Accuracy: 0.6719

=== Training with seed=6783 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9721 | Val Accuracy: 0.6168
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6561 | Val Accuracy: 0.6693
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3440 | Val Accuracy: 0.6824

=== Training with seed=8056 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9554 | Val Accuracy: 0.6089
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6364 | Val Accuracy: 0.6929
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3100 | Val Accuracy: 0.7244

=== Training with seed=8168 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9513 | Val Accuracy: 0.5223
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6531 | Val Accuracy: 0.7139
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3304 | Val Accuracy: 0.7402

=== Training with seed=9345 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9498 | Val Accuracy: 0.5276
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6371 | Val Accuracy: 0.6929
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3472 | Val Accuracy: 0.6877

=== Training with seed=8065 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9790 | Val Accuracy: 0.6483
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6606 | Val Accuracy: 0.7192
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3453 | Val Accuracy: 0.6982

=== Training with seed=7951 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9634 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.5994 | Val Accuracy: 0.7297
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2736 | Val Accuracy: 0.7270

=== Training with seed=8029 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9785 | Val Accuracy: 0.6063
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6806 | Val Accuracy: 0.7087
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4274 | Val Accuracy: 0.7165

=== Training with seed=2384 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9412 | Val Accuracy: 0.6168
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6357 | Val Accuracy: 0.6929
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3239 | Val Accuracy: 0.6903

=== Training with seed=253 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9613 | Val Accuracy: 0.6378
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7301 | Val Accuracy: 0.6982
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3844 | Val Accuracy: 0.6772

=== Training with seed=4636 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9596 | Val Accuracy: 0.5958
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6383 | Val Accuracy: 0.7165
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2635 | Val Accuracy: 0.7270

=== Training with seed=592 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9582 | Val Accuracy: 0.6115
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6584 | Val Accuracy: 0.6982
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3464 | Val Accuracy: 0.7218

=== Training with seed=6448 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9667 | Val Accuracy: 0.6194
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6702 | Val Accuracy: 0.6929
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3971 | Val Accuracy: 0.7139

=== Training with seed=7266 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9577 | Val Accuracy: 0.6457
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6683 | Val Accuracy: 0.6352
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3685 | Val Accuracy: 0.7165

=== Training with seed=642 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9537 | Val Accuracy: 0.6509
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6478 | Val Accuracy: 0.7559
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3354 | Val Accuracy: 0.7559

=== Training with seed=4388 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9158 | Val Accuracy: 0.5827
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6101 | Val Accuracy: 0.7060
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3403 | Val Accuracy: 0.6982

=== Training with seed=3887 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9465 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6483 | Val Accuracy: 0.7008
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2993 | Val Accuracy: 0.7297

=== Training with seed=1815 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9412 | Val Accuracy: 0.7139
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6258 | Val Accuracy: 0.7087
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2969 | Val Accuracy: 0.7559

=== Training with seed=7777 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9380 | Val Accuracy: 0.6509
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.5383 | Val Accuracy: 0.7480
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2321 | Val Accuracy: 0.6955

=== Training with seed=5222 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9695 | Val Accuracy: 0.6798
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6206 | Val Accuracy: 0.7402
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2763 | Val Accuracy: 0.7585

=== Training with seed=4430 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9644 | Val Accuracy: 0.6010
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6516 | Val Accuracy: 0.7008
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3503 | Val Accuracy: 0.7585

=== Training with seed=5120 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9418 | Val Accuracy: 0.6352
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6367 | Val Accuracy: 0.7270
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2861 | Val Accuracy: 0.7480

=== Training with seed=9834 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0244 | Val Accuracy: 0.6457
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.8487 | Val Accuracy: 0.6955
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.5754 | Val Accuracy: 0.7927

=== Training with seed=9597 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9999 | Val Accuracy: 0.5748
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7216 | Val Accuracy: 0.6772
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3987 | Val Accuracy: 0.7297

=== Training with seed=1238 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9373 | Val Accuracy: 0.6063
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6453 | Val Accuracy: 0.6824
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3352 | Val Accuracy: 0.7008

=== Training with seed=6011 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9469 | Val Accuracy: 0.5958
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6530 | Val Accuracy: 0.7533
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3270 | Val Accuracy: 0.7270

=== Training with seed=1501 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9713 | Val Accuracy: 0.6089
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6354 | Val Accuracy: 0.6745
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3233 | Val Accuracy: 0.6929

=== Training with seed=3978 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9319 | Val Accuracy: 0.6194
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6187 | Val Accuracy: 0.7113
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3368 | Val Accuracy: 0.7113

=== Training with seed=393 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9911 | Val Accuracy: 0.4751
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7194 | Val Accuracy: 0.7297
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4171 | Val Accuracy: 0.7244

=== Training with seed=666 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9491 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7215 | Val Accuracy: 0.6824
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4250 | Val Accuracy: 0.7375

=== Training with seed=1719 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0024 | Val Accuracy: 0.6614
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7224 | Val Accuracy: 0.7192
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3957 | Val Accuracy: 0.7585

=== Training with seed=6421 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9613 | Val Accuracy: 0.5801
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6678 | Val Accuracy: 0.7139
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3492 | Val Accuracy: 0.7323

=== Training with seed=9929 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9539 | Val Accuracy: 0.5879
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6144 | Val Accuracy: 0.7375
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2996 | Val Accuracy: 0.7270

=== Training with seed=3045 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9622 | Val Accuracy: 0.6483
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7194 | Val Accuracy: 0.7270
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4194 | Val Accuracy: 0.7454

=== Training with seed=151 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9350 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6168 | Val Accuracy: 0.7953
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3389 | Val Accuracy: 0.7454

=== Training with seed=3062 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9593 | Val Accuracy: 0.6667
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7266 | Val Accuracy: 0.7087
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4119 | Val Accuracy: 0.6824

=== Training with seed=4747 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0015 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7233 | Val Accuracy: 0.6772
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.5004 | Val Accuracy: 0.7165

=== Training with seed=5469 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9666 | Val Accuracy: 0.6089
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6794 | Val Accuracy: 0.7139
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3175 | Val Accuracy: 0.7060

=== Training with seed=2934 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0052 | Val Accuracy: 0.6719
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6808 | Val Accuracy: 0.7769
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3007 | Val Accuracy: 0.7533

=== Training with seed=8183 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9656 | Val Accuracy: 0.6168
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7004 | Val Accuracy: 0.7297
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3886 | Val Accuracy: 0.7270

=== Training with seed=996 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9364 | Val Accuracy: 0.6509
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6222 | Val Accuracy: 0.7533
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2946 | Val Accuracy: 0.7218

=== Training with seed=9774 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9457 | Val Accuracy: 0.6220
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6274 | Val Accuracy: 0.7218
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2848 | Val Accuracy: 0.7375

=== Training with seed=8515 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9519 | Val Accuracy: 0.6457
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6686 | Val Accuracy: 0.7297
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3465 | Val Accuracy: 0.7349

=== Training with seed=8129 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9378 | Val Accuracy: 0.6850
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6130 | Val Accuracy: 0.7664
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3271 | Val Accuracy: 0.7717

=== Training with seed=6258 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9652 | Val Accuracy: 0.5958
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6793 | Val Accuracy: 0.6640
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3591 | Val Accuracy: 0.7402

=== Training with seed=108 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9528 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6535 | Val Accuracy: 0.7480
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3315 | Val Accuracy: 0.7428

=== Training with seed=7881 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9266 | Val Accuracy: 0.5827
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6228 | Val Accuracy: 0.7034
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3111 | Val Accuracy: 0.7953

=== Training with seed=7917 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9983 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6718 | Val Accuracy: 0.7402
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3602 | Val Accuracy: 0.7507

=== Training with seed=3402 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.8821 | Val Accuracy: 0.7139
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.5276 | Val Accuracy: 0.6929
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2540 | Val Accuracy: 0.7375

=== Training with seed=7467 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9509 | Val Accuracy: 0.6430
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6219 | Val Accuracy: 0.6877
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3497 | Val Accuracy: 0.7375

=== Training with seed=1502 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9879 | Val Accuracy: 0.6037
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6955 | Val Accuracy: 0.7218
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3703 | Val Accuracy: 0.7690

=== Training with seed=4591 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9929 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7216 | Val Accuracy: 0.7559
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3845 | Val Accuracy: 0.6667

=== Training with seed=1004 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9803 | Val Accuracy: 0.6273
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6606 | Val Accuracy: 0.6719
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3308 | Val Accuracy: 0.7087

=== Training with seed=4549 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9395 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6493 | Val Accuracy: 0.7270
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3703 | Val Accuracy: 0.7192

=== Training with seed=454 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9333 | Val Accuracy: 0.6535
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6744 | Val Accuracy: 0.6772
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4122 | Val Accuracy: 0.7008

=== Training with seed=465 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9320 | Val Accuracy: 0.6535
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.5677 | Val Accuracy: 0.6877
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2931 | Val Accuracy: 0.7165

=== Training with seed=1522 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9860 | Val Accuracy: 0.6404
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6803 | Val Accuracy: 0.7060
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3776 | Val Accuracy: 0.7454

=== Training with seed=3305 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9994 | Val Accuracy: 0.6247
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7344 | Val Accuracy: 0.6667
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.4380 | Val Accuracy: 0.7113

=== Training with seed=3565 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9577 | Val Accuracy: 0.6115
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6778 | Val Accuracy: 0.7533
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3237 | Val Accuracy: 0.7559

=== Training with seed=1686 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9463 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6464 | Val Accuracy: 0.7480
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3295 | Val Accuracy: 0.7323

=== Training with seed=2657 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9782 | Val Accuracy: 0.6299
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6571 | Val Accuracy: 0.7034
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3482 | Val Accuracy: 0.7480

=== Training with seed=4184 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9272 | Val Accuracy: 0.6483
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6256 | Val Accuracy: 0.7270
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.2914 | Val Accuracy: 0.6982

=== Training with seed=3893 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 1.0206 | Val Accuracy: 0.4567
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 1.0680 | Val Accuracy: 0.4567
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 1.0736 | Val Accuracy: 0.4567

=== Training with seed=29 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9755 | Val Accuracy: 0.6325
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6452 | Val Accuracy: 0.7743
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3129 | Val Accuracy: 0.7559

=== Training with seed=4391 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9400 | Val Accuracy: 0.6404
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6099 | Val Accuracy: 0.7507
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3265 | Val Accuracy: 0.7559

=== Training with seed=5820 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9792 | Val Accuracy: 0.6194
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.7119 | Val Accuracy: 0.6798
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3981 | Val Accuracy: 0.6719

=== Training with seed=3259 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9243 | Val Accuracy: 0.6614
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6476 | Val Accuracy: 0.7244
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3373 | Val Accuracy: 0.7638

=== Training with seed=5265 ===


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 1/3 | Train Loss: 0.9239 | Val Accuracy: 0.5223
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 2/3 | Train Loss: 0.6533 | Val Accuracy: 0.7008
[cardiffnlp/twitter-roberta-base-sentiment-latest] Epoch 3/3 | Train Loss: 0.3293 | Val Accuracy: 0.7165

All seed results:
{'seed': 7871, 'accuracy': 0.7375328083989501}
{'seed': 1026, 'accuracy': 0.7191601049868767}
{'seed': 4217, 'accuracy': 0.7611548556430446}
{'seed': 3699, 'accuracy': 0.6929133858267716}
{'seed': 8085, 'accuracy': 0.7165354330708661}
{'seed': 8902, 'accuracy': 0.800524934383202}
{'seed': 9236, 'accuracy': 0.7349081364829396}
{'seed': 61, 'accuracy': 0.7165354330708661}
{'seed': 23, 'accuracy': 0.7454068241469817}
{'seed': 6538, 'accuracy': 0.7322834645669292}
{'seed': 5631, 'accuracy': 0.7165354330708661}
{'seed': 2358, 'accuracy': 0.7611548556430446}
{'seed': 7897, 'accuracy': 0.6876640419947506}
{'seed': 8534, 'accuracy': 0.7664041994750657}
{'seed': 2395