# Fine-tune BERT on a Mock Sentiment Dataset

In [None]:
!pip install transformers datasets accelerate -q

## Load dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset('csv', data_files={'train': 'mock_sentiment.csv', 'validation': 'mock_sentiment.csv'})

## Tokenize

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=64)

encoded_dataset = dataset.map(tokenize_function, batched=True)

## Load model

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

## Training

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir='./results',
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    max_steps=100,
    evaluation_strategy='steps',
    eval_steps=10,
    logging_steps=10,
    save_strategy='no'
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset['train'],
    eval_dataset=encoded_dataset['validation'],
)

trainer.train()

## Evaluate

In [None]:
metrics = trainer.evaluate()
print(metrics)