In [5]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Load the labeled dataset
with open('training_articles.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Parse data: use 'text' as input and 'relevant' as label (convert to int if needed)
df = pd.DataFrame([{'text': item.get('text', ''), 'label': 1 if str(item.get('relevant', '')).lower() == 'true' else 0} for item in data])

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train a logistic regression classifier
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_vec, y_train)

# Evaluate the classifier
y_pred = clf.predict(X_test_vec)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.67      1.00      0.80        26
           1       1.00      0.07      0.13        14

    accuracy                           0.68        40
   macro avg       0.83      0.54      0.47        40
weighted avg       0.78      0.68      0.57        40



In [2]:
import json
import pandas as pd
import torch
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    Trainer,
    TrainingArguments,
)
from sklearn.preprocessing import LabelEncoder

# 1. Load & prepare data once
with open("training_articles.json", "r", encoding="utf-8") as f:
    data = json.load(f)
df = pd.DataFrame([
    {
        "text": item.get("text", ""),
        "label": 1 if str(item.get("relevant", "")).lower() == "true" else 0,
    }
    for item in data
])

# 2. Encode labels
le = LabelEncoder()
df["label_enc"] = le.fit_transform(df["label"])

# 3. Tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
def tokenize_texts(texts, max_length=256):
    return tokenizer(
        list(texts),
        padding="max_length",
        truncation=True,
        max_length=max_length,
        return_tensors="pt",
    )

# 4. Metrics fn
def compute_metrics(p):
    preds = p.predictions.argmax(axis=-1)
    labels = p.label_ids
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1":       f1_score(labels, preds, average="binary"),
    }

# 5. Prepare cross-validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []

for fold, (train_idx, val_idx) in enumerate(skf.split(df["text"], df["label_enc"]), 1):
    print(f"\n### Fold {fold}")

    # Split into texts & labels
    train_texts = df.loc[train_idx, "text"]
    train_labels = df.loc[train_idx, "label_enc"]
    val_texts   = df.loc[val_idx,   "text"]
    val_labels   = df.loc[val_idx,   "label_enc"]

    # Tokenize
    train_enc = tokenize_texts(train_texts)
    val_enc   = tokenize_texts(val_texts)

    # Dataset wrapper
    class NewsDataset(torch.utils.data.Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels    = torch.tensor(labels.values if hasattr(labels, "values") else labels)
        def __getitem__(self, idx):
            item = {k: v[idx] for k, v in self.encodings.items()}
            item["labels"] = self.labels[idx]
            return item
        def __len__(self):
            return len(self.labels)

    train_ds = NewsDataset(train_enc, train_labels)
    val_ds   = NewsDataset(val_enc,   val_labels)

    # Load fresh model for each fold
    model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

    # Training args (you can adjust per your GPU/memory)
    args = TrainingArguments(
        output_dir=f"./results/fold{fold}",
        num_train_epochs=2,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        do_train=True,
        do_eval=True,
        logging_steps=10,
        save_steps=0,
        disable_tqdm=False,
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    # Train & eval
    trainer.train()
    metrics = trainer.evaluate()

    # Record F1
    fold_f1 = metrics["eval_f1"]
    fold_results.append(fold_f1)
    print(f"Fold {fold} F1 = {fold_f1:.4f}")

# 6. Summarize
avg_f1 = sum(fold_results) / len(fold_results)
print(f"\nCross-validated F1 scores: {fold_results}")
print(f"Mean F1: {avg_f1:.4f}")


  from .autonotebook import tqdm as notebook_tqdm



### Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
10,0.6448
20,0.5273
30,0.3118
40,0.2924


Fold 1 F1 = 0.7500

### Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
10,0.6684
20,0.4009
30,0.2205
40,0.2649


Fold 2 F1 = 0.8889

### Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
10,0.7445
20,0.4831
30,0.3683
40,0.2923


Fold 3 F1 = 0.5600

### Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
10,0.6313
20,0.4584
30,0.1984
40,0.3144


Fold 4 F1 = 0.9091

### Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
10,0.6458
20,0.5561
30,0.4296
40,0.3301


Fold 5 F1 = 0.7619

Cross-validated F1 scores: [0.75, 0.8888888888888888, 0.56, 0.9090909090909091, 0.7619047619047619]
Mean F1: 0.7740


In [5]:
# Tokenize entire corpus
full_enc = tokenize_texts(df["text"])
full_ds  = NewsDataset(full_enc, df["label_enc"])

# Load a fresh model
final_model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", num_labels=2
)

# (Optional) freeze layers if you found that helpful
for i, layer in enumerate(final_model.bert.encoder.layer):
    if i < 8:
        for p in layer.parameters():
            p.requires_grad = False

# TrainingArguments for the “final” run
final_args = TrainingArguments(
    output_dir="./final_model",
    num_train_epochs=3,                # train a bit longer now
    per_device_train_batch_size=8,
    learning_rate=3e-5,                # or whatever your best CV LR was
    weight_decay=0.01,                 # from your inner CV
    logging_steps=50,
    save_steps=500,
)

# Trainer & train
final_trainer = Trainer(
    model=final_model,
    args=final_args,
    train_dataset=full_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,   # so you can still call evaluate()
)
final_trainer.train()

# Save model & tokenizer
final_trainer.save_model("./final_model")
tokenizer.save_pretrained("./final_model")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  final_trainer = Trainer(


Step,Training Loss
50,0.4448


('./final_model/tokenizer_config.json',
 './final_model/special_tokens_map.json',
 './final_model/vocab.txt',
 './final_model/added_tokens.json')

In [6]:
import torch
import torch.nn.functional as F

# Load the final model back (just to demo inference)
from transformers import BertForSequenceClassification
inference_model = BertForSequenceClassification.from_pretrained("./final_model")
inference_model.eval()

# Pick 5 random articles
sample = df.sample(5, random_state=123).reset_index(drop=True)
texts  = sample["text"].tolist()

# Tokenize & to device
enc = tokenizer(texts, padding="max_length", truncation=True, max_length=256, return_tensors="pt")
with torch.no_grad():
    logits = inference_model(enc["input_ids"], attention_mask=enc["attention_mask"]).logits
probs = F.softmax(logits, dim=-1)

# Map back to label names
inv_map = {i: cls for i, cls in enumerate(le.classes_)}

for i, txt in enumerate(texts):
    pred_id   = torch.argmax(probs[i]).item()
    confidence= probs[i, pred_id].item()
    print(f"--- Example {i+1} ---")
    print(txt[:200].replace("\n"," "), "…")
    print(f"Predicted = {inv_map[pred_id]}  (conf={confidence:.2f})\n")


--- Example 1 ---
Born a few weeks into the Gaza war, infant twins Wesam and Naeem Abu Anza were buried on Sunday, the youngest of 14 members of the same family whom Gaza health authorities say were killed in an Israel …
Predicted = 1  (conf=0.89)

--- Example 2 ---
Volodymyr Zelensky  has hailed the UK’s £2.5 billion military aid package for  Ukraine , as  Rishi Sunak  promised to continue to stand with the country in its fight against Russia. The  Prime Ministe …
Predicted = 0  (conf=0.94)

--- Example 3 ---
Dr. Siegel: I've changed my view of Fetterman and now see his 'personal courage'   Fox News medical contributor Dr. Marc Siegel applauds Democratic Sen. John Fetterman's recent candid interview on his …
Predicted = 0  (conf=0.97)

--- Example 4 ---
Dozens of global traditions could be part of UNESCO's intangible global heritage list this week. These include Italian opera singing and Bangladeshi rickshaw art as well as the Peruvian delicacy of ce …
Predicted = 0  (conf=0.95)

--- 

In [None]:
import json
import json5
import torch
import torch.nn.functional as F
from transformers import BertTokenizer, BertForSequenceClassification

# 1) Paste in your JSON:
with open("articles.json", "r", encoding="utf-8") as f:
    articles = json.load(f)   # <-- json.load on a file, not json.loads on a string

texts    = [a["text"] for a in articles]
titles   = [a["title"] for a in articles]

# 3) Load your fine-tuned model & tokenizer
MODEL_PATH = "./final_model"   # adjust if different
tokenizer  = BertTokenizer.from_pretrained(MODEL_PATH)
model      = BertForSequenceClassification.from_pretrained(MODEL_PATH)
model.eval()

# 4) Tokenize inputs
enc = tokenizer(
    texts,
    padding="max_length",
    truncation=True,
    max_length=256,
    return_tensors="pt",
)

# 5) Run inference
with torch.no_grad():
    logits = model(
        input_ids=enc["input_ids"],
        attention_mask=enc["attention_mask"],
    ).logits

probs       = F.softmax(logits, dim=-1)
pred_ids    = probs.argmax(dim=-1).tolist()
confidences = probs.max(dim=-1).values.tolist()

# 6) Map class IDs back to names
label_map = {0: "irrelevant", 1: "relevant"}

# 7) Print results
for i, title in enumerate(titles):
    print(f"--- Article {i+1} ---")
    print("Title: ", title)
    print(f"Predicted: {label_map[pred_ids[i]]}  (confidence: {confidences[i]:.2f})\n")


# High RECALL, precision not == 100% but close.

--- Article 1 ---
Title:  ‘Kamala Harris, Joe Biden ignored Hindus’: Donald Trump condemns violence against minorities
Predicted: irrelevant  (confidence: 0.97)

--- Article 2 ---
Title:  Israeli firefighting teams battle bushfires near Jerusalem for second day
Predicted: relevant  (confidence: 0.80)

--- Article 3 ---
Title:  NYC broker fee law — which protects tenants from costs — challenged by real estate agents in 11th-hour push
Predicted: irrelevant  (confidence: 0.95)

--- Article 4 ---
Title:  US proposes 60-day ceasefire for Gaza; hostage-prisoner swap, plan shows
Predicted: relevant  (confidence: 0.87)

--- Article 5 ---
Title:  Mother of Palestinian American boy slain in suburban Chicago hate crime testifies at trial
Predicted: irrelevant  (confidence: 0.83)

--- Article 6 ---
Title:  5 things to know for Feb. 26: Air travel safety, Budget blueprint, Ukraine’s minerals, Immigration, Gaza ceasefire
Predicted: irrelevant  (confidence: 0.97)

--- Article 7 ---
Title:  5 things t

In [None]:
import json
import pandas as pd
import torch
from sklearn.model_selection import StratifiedKFold, ParameterGrid
from sklearn.metrics import accuracy_score, f1_score
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    Trainer,
    TrainingArguments,
)
from sklearn.preprocessing import LabelEncoder

# 1. Load & prepare data
with open("training_articles.json", "r", encoding="utf-8") as f:
    raw = json.load(f)
df = pd.DataFrame([
    {
        "text": item.get("text", ""),
        "label": 1 if str(item.get("relevant", "")).lower() == "true" else 0,
    }
    for item in raw
])

# 2. Encode labels
le = LabelEncoder()
df["label_enc"] = le.fit_transform(df["label"])

# 3. Tokenizer helper
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
def tokenize_texts(texts, max_length=256):
    return tokenizer(
        list(texts),
        padding="max_length",
        truncation=True,
        max_length=max_length,
        return_tensors="pt",
    )

# 4. Dataset wrapper
class NewsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels    = torch.tensor(labels.values if hasattr(labels, "values") else labels)
    def __getitem__(self, idx):
        item = {k: v[idx] for k, v in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item
    def __len__(self):
        return len(self.labels)

# 5. Metrics function
def compute_metrics(p):
    preds = p.predictions.argmax(axis=-1)
    return {
        "accuracy": accuracy_score(p.label_ids, preds),
        "f1":       f1_score(p.label_ids, preds, average="binary"),
    }

# 6. Hyperparameter grid for inner CV
param_grid = [
    {"learning_rate": 2e-5, "weight_decay": 0.0},
    {"learning_rate": 3e-5, "weight_decay": 0.01},
]

outer_skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
outer_scores = []

for fold, (train_idx, test_idx) in enumerate(
    outer_skf.split(df["text"], df["label_enc"]), start=1
):
    print(f"\n=== Outer Fold {fold} ===")
    X_train, y_train = df["text"].iloc[train_idx], df["label_enc"].iloc[train_idx]
    X_test,  y_test  = df["text"].iloc[test_idx],  df["label_enc"].iloc[test_idx]

    # ---- Inner CV to select hyperparameters ----
    best_inner_f1 = -1.0
    best_params   = param_grid[0]  # fallback
    inner_skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=fold)

    for params in param_grid:
        inner_f1s = []
        for inner_tr, inner_val in inner_skf.split(X_train, y_train):
            # Tokenize
            enc_tr = tokenize_texts(X_train.iloc[inner_tr])
            enc_val= tokenize_texts(X_train.iloc[inner_val])
            ds_tr  = NewsDataset(enc_tr, y_train.iloc[inner_tr])
            ds_val = NewsDataset(enc_val, y_train.iloc[inner_val])

            # Load model & freeze first 8 layers
            model = BertForSequenceClassification.from_pretrained(
                "bert-base-uncased", num_labels=2
            )
            for i, layer in enumerate(model.bert.encoder.layer):
                if i < 8:
                    for p in layer.parameters():
                        p.requires_grad = False

            # Legacy TrainingArguments (no evaluation_strategy)
            args = TrainingArguments(
                output_dir=f"./tmp/inner_fold{fold}",
                num_train_epochs=2,
                per_device_train_batch_size=8,
                per_device_eval_batch_size=8,
                learning_rate=params["learning_rate"],
                weight_decay=params["weight_decay"],
                do_train=True,
                do_eval=True,
                logging_steps=10,
                save_steps=0,
                disable_tqdm=True,
            )
            trainer = Trainer(
                model=model,
                args=args,
                train_dataset=ds_tr,
                eval_dataset=ds_val,
                compute_metrics=compute_metrics,
            )
            trainer.train()
            metrics = trainer.evaluate()
            inner_f1s.append(metrics["eval_f1"])

        avg_f1 = sum(inner_f1s) / len(inner_f1s)
        if avg_f1 > best_inner_f1:
            best_inner_f1, best_params = avg_f1, params

    print(f"Best inner F1 = {best_inner_f1:.4f} with {best_params}")

    # ---- Final training on full train, evaluation on outer test ----
    enc_tr_full = tokenize_texts(X_train)
    enc_te_full = tokenize_texts(X_test)
    ds_tr_full  = NewsDataset(enc_tr_full, y_train)
    ds_te_full  = NewsDataset(enc_te_full,  y_test)

    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased", num_labels=2
    )
    for i, layer in enumerate(model.bert.encoder.layer):
        if i < 8:
            for p in layer.parameters():
                p.requires_grad = False

    args = TrainingArguments(
        output_dir=f"./results/fold{fold}",
        num_train_epochs=2,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        learning_rate=best_params["learning_rate"],
        weight_decay= best_params["weight_decay"],
        do_train=True,
        do_eval=True,
        logging_steps=10,
        save_steps=0,
        disable_tqdm=True,
    )
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=ds_tr_full,
        eval_dataset=ds_te_full,
        compute_metrics=compute_metrics,
    )
    trainer.train()
    final_metrics = trainer.evaluate()
    outer_scores.append(final_metrics["eval_f1"])
    print(f"Outer Fold {fold} F1 = {final_metrics['eval_f1']:.4f}")

# Summarize nested CV results
print("\nNested CV F1 per fold:", outer_scores)
print("Mean F1:", sum(outer_scores) / len(outer_scores))



=== Outer Fold 1 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6463, 'grad_norm': 8.24787712097168, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5087, 'grad_norm': 3.069215774536133, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 6.0999, 'train_samples_per_second': 34.754, 'train_steps_per_second': 4.59, 'train_loss': 0.5816445010049003, 'epoch': 2.0}
{'eval_loss': 0.5504566431045532, 'eval_accuracy': 0.7037037037037037, 'eval_f1': 0.0, 'eval_runtime': 0.2536, 'eval_samples_per_second': 212.959, 'eval_steps_per_second': 27.606, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.64, 'grad_norm': 2.9499199390411377, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5572, 'grad_norm': 9.155176162719727, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 6.5471, 'train_samples_per_second': 32.686, 'train_steps_per_second': 4.277, 'train_loss': 0.5644358992576599, 'epoch': 2.0}
{'eval_loss': 0.5360226631164551, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2502, 'eval_samples_per_second': 211.79, 'eval_steps_per_second': 27.972, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.626, 'grad_norm': 9.719393730163574, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.6005, 'grad_norm': 9.842891693115234, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.4518, 'train_samples_per_second': 39.253, 'train_steps_per_second': 5.136, 'train_loss': 0.608568583215986, 'epoch': 2.0}
{'eval_loss': 0.5433419942855835, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2539, 'eval_samples_per_second': 208.746, 'eval_steps_per_second': 27.57, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6638, 'grad_norm': 11.24498462677002, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4408, 'grad_norm': 4.921231746673584, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.8273, 'train_samples_per_second': 36.38, 'train_steps_per_second': 4.805, 'train_loss': 0.5519413096564156, 'epoch': 2.0}
{'eval_loss': 0.4736465811729431, 'eval_accuracy': 0.7037037037037037, 'eval_f1': 0.0, 'eval_runtime': 0.2604, 'eval_samples_per_second': 207.348, 'eval_steps_per_second': 26.878, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6327, 'grad_norm': 2.903373956680298, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5231, 'grad_norm': 9.467113494873047, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.8973, 'train_samples_per_second': 36.288, 'train_steps_per_second': 4.748, 'train_loss': 0.5357553788593837, 'epoch': 2.0}
{'eval_loss': 0.47636184096336365, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2592, 'eval_samples_per_second': 204.444, 'eval_steps_per_second': 27.002, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6065, 'grad_norm': 10.479204177856445, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.57, 'grad_norm': 9.225403785705566, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.2315, 'train_samples_per_second': 29.593, 'train_steps_per_second': 3.872, 'train_loss': 0.5743180002485003, 'epoch': 2.0}
{'eval_loss': 0.5016201138496399, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2519, 'eval_samples_per_second': 210.369, 'eval_steps_per_second': 27.785, 'epoch': 2.0}
Best inner F1 = 0.0000 with {'learning_rate': 2e-05, 'weight_decay': 0.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.659, 'grad_norm': 5.981875896453857, 'learning_rate': 1.55e-05, 'epoch': 0.5}
{'loss': 0.5386, 'grad_norm': 4.960479736328125, 'learning_rate': 1.0500000000000001e-05, 'epoch': 1.0}
{'loss': 0.4724, 'grad_norm': 3.3569984436035156, 'learning_rate': 5.500000000000001e-06, 'epoch': 1.5}
{'loss': 0.5384, 'grad_norm': 10.607613563537598, 'learning_rate': 5.000000000000001e-07, 'epoch': 2.0}
{'train_runtime': 8.7169, 'train_samples_per_second': 36.71, 'train_steps_per_second': 4.589, 'train_loss': 0.5520978927612304, 'epoch': 2.0}
{'eval_loss': 0.47544392943382263, 'eval_accuracy': 0.7, 'eval_f1': 0.0, 'eval_runtime': 0.1914, 'eval_samples_per_second': 208.983, 'eval_steps_per_second': 26.123, 'epoch': 2.0}
Outer Fold 1 F1 = 0.0000

=== Outer Fold 2 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6564, 'grad_norm': 3.6161696910858154, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5293, 'grad_norm': 8.3313570022583, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 4.8453, 'train_samples_per_second': 43.754, 'train_steps_per_second': 5.779, 'train_loss': 0.5697400399616787, 'epoch': 2.0}
{'eval_loss': 0.49598097801208496, 'eval_accuracy': 0.7037037037037037, 'eval_f1': 0.0, 'eval_runtime': 0.2541, 'eval_samples_per_second': 212.488, 'eval_steps_per_second': 27.545, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6381, 'grad_norm': 5.579145908355713, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.6484, 'grad_norm': 3.1049325466156006, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.9992, 'train_samples_per_second': 26.753, 'train_steps_per_second': 3.5, 'train_loss': 0.597965555531638, 'epoch': 2.0}
{'eval_loss': 0.5445151925086975, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2543, 'eval_samples_per_second': 208.444, 'eval_steps_per_second': 27.53, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6549, 'grad_norm': 6.612698554992676, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5735, 'grad_norm': 3.175189256668091, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 8.0195, 'train_samples_per_second': 26.685, 'train_steps_per_second': 3.491, 'train_loss': 0.584602849824088, 'epoch': 2.0}
{'eval_loss': 0.5416379570960999, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2574, 'eval_samples_per_second': 205.902, 'eval_steps_per_second': 27.195, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.638, 'grad_norm': 4.412699222564697, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4913, 'grad_norm': 8.525997161865234, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 8.1694, 'train_samples_per_second': 25.951, 'train_steps_per_second': 3.427, 'train_loss': 0.5249090365001133, 'epoch': 2.0}
{'eval_loss': 0.41459861397743225, 'eval_accuracy': 0.7592592592592593, 'eval_f1': 0.3157894736842105, 'eval_runtime': 0.2587, 'eval_samples_per_second': 208.715, 'eval_steps_per_second': 27.056, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6165, 'grad_norm': 4.027401924133301, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.6501, 'grad_norm': 3.460874557495117, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 8.0075, 'train_samples_per_second': 26.725, 'train_steps_per_second': 3.497, 'train_loss': 0.5772701672145298, 'epoch': 2.0}
{'eval_loss': 0.5035780072212219, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2561, 'eval_samples_per_second': 206.957, 'eval_steps_per_second': 27.334, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6407, 'grad_norm': 7.96733283996582, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5445, 'grad_norm': 3.611819267272949, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 6.346, 'train_samples_per_second': 33.722, 'train_steps_per_second': 4.412, 'train_loss': 0.5542388728686741, 'epoch': 2.0}
{'eval_loss': 0.4935925304889679, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2565, 'eval_samples_per_second': 206.629, 'eval_steps_per_second': 27.291, 'epoch': 2.0}
Best inner F1 = 0.1053 with {'learning_rate': 3e-05, 'weight_decay': 0.01}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6623, 'grad_norm': 7.379771709442139, 'learning_rate': 2.3250000000000003e-05, 'epoch': 0.5}
{'loss': 0.4552, 'grad_norm': 4.989931106567383, 'learning_rate': 1.575e-05, 'epoch': 1.0}
{'loss': 0.375, 'grad_norm': 4.885223388671875, 'learning_rate': 8.25e-06, 'epoch': 1.5}
{'loss': 0.3836, 'grad_norm': 4.521304607391357, 'learning_rate': 7.5e-07, 'epoch': 2.0}
{'train_runtime': 8.1795, 'train_samples_per_second': 39.122, 'train_steps_per_second': 4.89, 'train_loss': 0.4690282344818115, 'epoch': 2.0}
{'eval_loss': 0.3691452145576477, 'eval_accuracy': 0.875, 'eval_f1': 0.782608695652174, 'eval_runtime': 0.1923, 'eval_samples_per_second': 207.97, 'eval_steps_per_second': 25.996, 'epoch': 2.0}
Outer Fold 2 F1 = 0.7826

=== Outer Fold 3 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6543, 'grad_norm': 5.2951507568359375, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5015, 'grad_norm': 4.484918594360352, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.7768, 'train_samples_per_second': 27.26, 'train_steps_per_second': 3.6, 'train_loss': 0.5172703521592277, 'epoch': 2.0}
{'eval_loss': 0.5110672116279602, 'eval_accuracy': 0.7407407407407407, 'eval_f1': 0.4166666666666667, 'eval_runtime': 0.2654, 'eval_samples_per_second': 203.435, 'eval_steps_per_second': 26.371, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6667, 'grad_norm': 10.417009353637695, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5071, 'grad_norm': 3.842071771621704, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 6.4576, 'train_samples_per_second': 33.139, 'train_steps_per_second': 4.336, 'train_loss': 0.5595980371747699, 'epoch': 2.0}
{'eval_loss': 0.49762895703315735, 'eval_accuracy': 0.7547169811320755, 'eval_f1': 0.3157894736842105, 'eval_runtime': 0.2617, 'eval_samples_per_second': 202.531, 'eval_steps_per_second': 26.749, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6854, 'grad_norm': 2.89085054397583, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5174, 'grad_norm': 2.7104721069335938, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.5853, 'train_samples_per_second': 28.213, 'train_steps_per_second': 3.691, 'train_loss': 0.5893386432102748, 'epoch': 2.0}
{'eval_loss': 0.5274100303649902, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2581, 'eval_samples_per_second': 205.347, 'eval_steps_per_second': 27.121, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6185, 'grad_norm': 4.48584508895874, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4431, 'grad_norm': 9.795675277709961, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.7461, 'train_samples_per_second': 27.369, 'train_steps_per_second': 3.615, 'train_loss': 0.46119712931769236, 'epoch': 2.0}
{'eval_loss': 0.45015376806259155, 'eval_accuracy': 0.7592592592592593, 'eval_f1': 0.5517241379310345, 'eval_runtime': 0.2633, 'eval_samples_per_second': 205.108, 'eval_steps_per_second': 26.588, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6468, 'grad_norm': 7.26979923248291, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4456, 'grad_norm': 11.053085327148438, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.9523, 'train_samples_per_second': 26.911, 'train_steps_per_second': 3.521, 'train_loss': 0.4873677151543753, 'epoch': 2.0}
{'eval_loss': 0.3919275104999542, 'eval_accuracy': 0.8301886792452831, 'eval_f1': 0.6896551724137931, 'eval_runtime': 0.261, 'eval_samples_per_second': 203.043, 'eval_steps_per_second': 26.817, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.671, 'grad_norm': 3.679316997528076, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4875, 'grad_norm': 3.104135751724243, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.9813, 'train_samples_per_second': 26.813, 'train_steps_per_second': 3.508, 'train_loss': 0.5627611875534058, 'epoch': 2.0}
{'eval_loss': 0.47860270738601685, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2638, 'eval_samples_per_second': 200.877, 'eval_steps_per_second': 26.531, 'epoch': 2.0}
Best inner F1 = 0.4138 with {'learning_rate': 3e-05, 'weight_decay': 0.01}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.7156, 'grad_norm': 12.387845039367676, 'learning_rate': 2.3250000000000003e-05, 'epoch': 0.5}
{'loss': 0.4453, 'grad_norm': 8.460331916809082, 'learning_rate': 1.575e-05, 'epoch': 1.0}
{'loss': 0.3226, 'grad_norm': 7.382590293884277, 'learning_rate': 8.25e-06, 'epoch': 1.5}
{'loss': 0.229, 'grad_norm': 13.870139122009277, 'learning_rate': 7.5e-07, 'epoch': 2.0}
{'train_runtime': 9.4363, 'train_samples_per_second': 33.911, 'train_steps_per_second': 4.239, 'train_loss': 0.4281137466430664, 'epoch': 2.0}
{'eval_loss': 0.5037909746170044, 'eval_accuracy': 0.7, 'eval_f1': 0.5, 'eval_runtime': 0.1968, 'eval_samples_per_second': 203.256, 'eval_steps_per_second': 25.407, 'epoch': 2.0}
Outer Fold 3 F1 = 0.5000

=== Outer Fold 4 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6634, 'grad_norm': 4.5602803230285645, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5838, 'grad_norm': 5.724076271057129, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 8.1167, 'train_samples_per_second': 26.119, 'train_steps_per_second': 3.45, 'train_loss': 0.5877760819026402, 'epoch': 2.0}
{'eval_loss': 0.5209246873855591, 'eval_accuracy': 0.7037037037037037, 'eval_f1': 0.0, 'eval_runtime': 0.2642, 'eval_samples_per_second': 204.357, 'eval_steps_per_second': 26.491, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6637, 'grad_norm': 4.2112717628479, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5869, 'grad_norm': 6.754333972930908, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.0657, 'train_samples_per_second': 30.287, 'train_steps_per_second': 3.963, 'train_loss': 0.5691882457051959, 'epoch': 2.0}
{'eval_loss': 0.5358714461326599, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2527, 'eval_samples_per_second': 209.751, 'eval_steps_per_second': 27.703, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6632, 'grad_norm': 3.534224510192871, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.6175, 'grad_norm': 2.8870081901550293, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 6.467, 'train_samples_per_second': 33.091, 'train_steps_per_second': 4.33, 'train_loss': 0.6244322402136666, 'epoch': 2.0}
{'eval_loss': 0.5436270833015442, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2608, 'eval_samples_per_second': 203.194, 'eval_steps_per_second': 26.837, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6501, 'grad_norm': 4.405368328094482, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5439, 'grad_norm': 5.033812999725342, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.8986, 'train_samples_per_second': 35.941, 'train_steps_per_second': 4.747, 'train_loss': 0.5454211575644357, 'epoch': 2.0}
{'eval_loss': 0.4667733609676361, 'eval_accuracy': 0.7407407407407407, 'eval_f1': 0.2222222222222222, 'eval_runtime': 0.2588, 'eval_samples_per_second': 208.661, 'eval_steps_per_second': 27.049, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6529, 'grad_norm': 4.283801078796387, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.5417, 'grad_norm': 7.669592380523682, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 6.5228, 'train_samples_per_second': 32.808, 'train_steps_per_second': 4.293, 'train_loss': 0.531026576246534, 'epoch': 2.0}
{'eval_loss': 0.4680681526660919, 'eval_accuracy': 0.7169811320754716, 'eval_f1': 0.11764705882352941, 'eval_runtime': 0.2619, 'eval_samples_per_second': 202.335, 'eval_steps_per_second': 26.724, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6466, 'grad_norm': 2.964806318283081, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.6034, 'grad_norm': 3.215555429458618, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 6.5141, 'train_samples_per_second': 32.852, 'train_steps_per_second': 4.298, 'train_loss': 0.5988059384482247, 'epoch': 2.0}
{'eval_loss': 0.5048835277557373, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2556, 'eval_samples_per_second': 207.363, 'eval_steps_per_second': 27.388, 'epoch': 2.0}
Best inner F1 = 0.1133 with {'learning_rate': 3e-05, 'weight_decay': 0.01}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6825, 'grad_norm': 10.350278854370117, 'learning_rate': 2.3250000000000003e-05, 'epoch': 0.5}
{'loss': 0.5227, 'grad_norm': 8.166686058044434, 'learning_rate': 1.575e-05, 'epoch': 1.0}
{'loss': 0.3841, 'grad_norm': 5.858389377593994, 'learning_rate': 8.25e-06, 'epoch': 1.5}
{'loss': 0.3683, 'grad_norm': 4.441047668457031, 'learning_rate': 7.5e-07, 'epoch': 2.0}
{'train_runtime': 7.8232, 'train_samples_per_second': 40.904, 'train_steps_per_second': 5.113, 'train_loss': 0.48939624428749084, 'epoch': 2.0}
{'eval_loss': 0.3053387403488159, 'eval_accuracy': 0.85, 'eval_f1': 0.6666666666666666, 'eval_runtime': 0.1964, 'eval_samples_per_second': 203.681, 'eval_steps_per_second': 25.46, 'epoch': 2.0}
Outer Fold 4 F1 = 0.6667

=== Outer Fold 5 ===


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.644, 'grad_norm': 5.013405799865723, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4858, 'grad_norm': 9.479620933532715, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.7696, 'train_samples_per_second': 36.745, 'train_steps_per_second': 4.853, 'train_loss': 0.5699025392532349, 'epoch': 2.0}
{'eval_loss': 0.5228021144866943, 'eval_accuracy': 0.7037037037037037, 'eval_f1': 0.1111111111111111, 'eval_runtime': 0.2631, 'eval_samples_per_second': 205.214, 'eval_steps_per_second': 26.602, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6696, 'grad_norm': 3.5225231647491455, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4608, 'grad_norm': 4.820092678070068, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.2559, 'train_samples_per_second': 40.716, 'train_steps_per_second': 5.327, 'train_loss': 0.5358146173613412, 'epoch': 2.0}
{'eval_loss': 0.5210983157157898, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2611, 'eval_samples_per_second': 202.963, 'eval_steps_per_second': 26.806, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6204, 'grad_norm': 3.082446813583374, 'learning_rate': 1.3571428571428574e-05, 'epoch': 0.7142857142857143}
{'loss': 0.491, 'grad_norm': 3.537034749984741, 'learning_rate': 6.4285714285714295e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.6967, 'train_samples_per_second': 37.565, 'train_steps_per_second': 4.915, 'train_loss': 0.5546565907342094, 'epoch': 2.0}
{'eval_loss': 0.4838898479938507, 'eval_accuracy': 0.6981132075471698, 'eval_f1': 0.0, 'eval_runtime': 0.2616, 'eval_samples_per_second': 202.561, 'eval_steps_per_second': 26.753, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6397, 'grad_norm': 4.963001728057861, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4574, 'grad_norm': 9.165356636047363, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 5.0948, 'train_samples_per_second': 41.611, 'train_steps_per_second': 5.496, 'train_loss': 0.5377746820449829, 'epoch': 2.0}
{'eval_loss': 0.4785335063934326, 'eval_accuracy': 0.7222222222222222, 'eval_f1': 0.21052631578947367, 'eval_runtime': 0.2601, 'eval_samples_per_second': 207.629, 'eval_steps_per_second': 26.915, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6515, 'grad_norm': 3.749399185180664, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4168, 'grad_norm': 4.520461082458496, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.0921, 'train_samples_per_second': 30.174, 'train_steps_per_second': 3.948, 'train_loss': 0.4892325145857675, 'epoch': 2.0}
{'eval_loss': 0.47362592816352844, 'eval_accuracy': 0.7547169811320755, 'eval_f1': 0.3157894736842105, 'eval_runtime': 0.257, 'eval_samples_per_second': 206.233, 'eval_steps_per_second': 27.238, 'epoch': 2.0}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.602, 'grad_norm': 2.950532913208008, 'learning_rate': 2.0357142857142858e-05, 'epoch': 0.7142857142857143}
{'loss': 0.4514, 'grad_norm': 3.5660126209259033, 'learning_rate': 9.642857142857144e-06, 'epoch': 1.4285714285714286}
{'train_runtime': 7.8665, 'train_samples_per_second': 27.204, 'train_steps_per_second': 3.559, 'train_loss': 0.5116022654942104, 'epoch': 2.0}
{'eval_loss': 0.40703240036964417, 'eval_accuracy': 0.7924528301886793, 'eval_f1': 0.47619047619047616, 'eval_runtime': 0.2605, 'eval_samples_per_second': 203.459, 'eval_steps_per_second': 26.872, 'epoch': 2.0}
Best inner F1 = 0.3342 with {'learning_rate': 3e-05, 'weight_decay': 0.01}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 0.6879, 'grad_norm': 11.426597595214844, 'learning_rate': 2.3250000000000003e-05, 'epoch': 0.5}
{'loss': 0.4246, 'grad_norm': 7.648776531219482, 'learning_rate': 1.575e-05, 'epoch': 1.0}
{'loss': 0.2924, 'grad_norm': 7.088068008422852, 'learning_rate': 8.25e-06, 'epoch': 1.5}
{'loss': 0.3632, 'grad_norm': 5.967757225036621, 'learning_rate': 7.5e-07, 'epoch': 2.0}
{'train_runtime': 9.9736, 'train_samples_per_second': 32.085, 'train_steps_per_second': 4.011, 'train_loss': 0.44202861189842224, 'epoch': 2.0}
{'eval_loss': 0.452415406703949, 'eval_accuracy': 0.775, 'eval_f1': 0.5714285714285714, 'eval_runtime': 0.1986, 'eval_samples_per_second': 201.391, 'eval_steps_per_second': 25.174, 'epoch': 2.0}
Outer Fold 5 F1 = 0.5714

Nested CV F1 per fold: [0.0, 0.782608695652174, 0.5, 0.6666666666666666, 0.5714285714285714]
Mean F1: 0.5041407867494824
