# Finetune Hugging Face BERT with PyTorch Lightning

Running the following cells will train the model using settings that are shown.

In [2]:
import torch

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger
# from lightning.pytorch.profilers import PyTorchProfiler

from toxy_bot.ml.datamodule import AutoTokenizerDataModule
from toxy_bot.ml.module import SequenceClassificationModule
from toxy_bot.ml.utils import create_dirs
from toxy_bot.ml.config import Config, DataModuleConfig, ModuleConfig, TrainerConfig

from toxy_bot.ml.trainer import train


First, let's configure some basic settings

In [3]:
# model and dataset
model_name = ModuleConfig.model_name
lr = ModuleConfig.learning_rate
dataset_name = DataModuleConfig.dataset_name
batch_size = DataModuleConfig.batch_size

print(f"Model: {model_name}")
print(f"Learning rate: {lr}")
print(f"Dataset: {dataset_name}")
print(f"Batch size: {batch_size}")

# paths
cache_dir = Config.cache_dir
log_dir = Config.log_dir
ckpt_dir = Config.ckpt_dir
# prof_dir = Config.prof_dir
perf_dir = Config.perf_dir
# creates dirs to avoid failure if empty dir has been deleted
create_dirs([cache_dir, log_dir, ckpt_dir, perf_dir])

# set matmul precision
# see https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html
torch.set_float32_matmul_precision("medium")

Model: google/bert_uncased_L-4_H-512_A-8
Learning rate: 3e-05
Dataset: anitamaxvim/jigsaw-toxic-comments
Batch size: 16


In [None]:
train(perf=True)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/bert_uncased_L-4_H-512_A-8 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/Users/dbozbay/Dev/toxy-bot/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/accelerator_connector.py:513: You passed `Trainer(accelerator='cpu', precision='16-mixed')` but AMP with fp16 is not supported on CPU. Using `precision='bf16-mixed'` instead.
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42
[2025-03-27 16:28:36.163277] Data cache exists. Loading from cache.
Map: 100%|██████████| 135635/135635 [01:24<00:00, 1601.82 examples/s]
Map: 100%|██████████| 23936/23936 [00:14<00:00, 1664.67 examples/s]

  | Name      | Type                

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/Users/dbozbay/Dev/toxy-bot/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


                                                                           

/Users/dbozbay/Dev/toxy-bot/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 0:   0%|          | 3/8478 [09:01<425:00:35,  0.01it/s, v_num=1, train_loss=0.678]