In [1]:
# General set up
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"

import torch
import numpy as np
from dotenv import load_dotenv
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
import evaluate

load_dotenv()

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def tokenize_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
model = model.to("cuda")

metric = evaluate.load("glue", "mrpc")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="../data/models/mrpc-model",      # Local save on RunPod
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    num_train_epochs=3,                          # Add this
    per_device_train_batch_size=32,
    fp16=True,
    report_to="none",
    push_to_hub=True,
    hub_model_id="tensor-polinomics/mrpc-bert-test",  # Add this - your HF repo
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

# Push to Hub after training
trainer.push_to_hub()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading builder script: 0.00B [00:00, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.419753,0.823529,0.879599
2,No log,0.37252,0.838235,0.886598
3,No log,0.467281,0.85049,0.896435


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

CommitInfo(commit_url='https://huggingface.co/tensor-polinomics/mrpc-bert-test/commit/345a9e7847623167cd135bb22364e8ae5a62542a', commit_message='End of training', commit_description='', oid='345a9e7847623167cd135bb22364e8ae5a62542a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/tensor-polinomics/mrpc-bert-test', endpoint='https://huggingface.co', repo_type='model', repo_id='tensor-polinomics/mrpc-bert-test'), pr_revision=None, pr_num=None)