In [None]:
# |default_exp callbacks
# |default_cls_lvl 3

In [None]:
# |hide
%reload_ext autoreload
%autoreload 2

# callbacks

> Callbacks used by the BLURR library.


In [None]:
# |export
from __future__ import annotations

import os, importlib, sys

import torch

from fastcore.all import *
from fastai.callback.all import *
from fastai.imports import *
from fastai.learner import *
from fastai.torch_core import *
from transformers import PreTrainedModel

In [None]:
# | echo: false
import gc, pdb

import GPUtil as GPU
from IPython.display import display
from fastai.text.all import *
from fastcore.test import *
from nbdev import nbdev_export
from nbdev.showdoc import show_doc

from blurr.text.modeling.all import *

In [None]:
# |export
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
# |hide
# |cuda
torch.cuda.set_device(1)
print(f"Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}")

Using GPU #1: GeForce GTX 1080 Ti


## Gradient Checkpointing

In [None]:
# |export
class CheckpointingNotSupported(Exception):
    def __init__(self, msg="Model does not support gradient checkpointing."):
        super().__init__(msg)

In [None]:
# |export
class GradientCheckpointing(Callback):
    """A fastai callback to enable gradient checkpointing for compatible HuggingFace models."""

    def before_fit(self):
        """Enable gradient checkpointing on before_fit event."""

        # Check that huggingface model supports gradient checkpointing
        if not self.model.hf_model.supports_gradient_checkpointing:
            raise CheckpointingNotSupported()

        if self.model.hf_model.is_gradient_checkpointing == False:
            self.model.hf_model.gradient_checkpointing_enable()

    def after_fit(self):
        """Disable gradient checkpointing on after_fit event."""
        if self.model.hf_model.is_gradient_checkpointing:
            self.model.hf_model.gradient_checkpointing_disable()

    @staticmethod
    def supported(model: PreTrainedModel):
        """Tests whether a HuggingFace `PreTrainedModel` supports gradient checkpointing."""
        return model.supports_gradient_checkpointing

We'll use a minified version of the IMDB dataset for testing

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
model_path = Path("models")
imdb_df = pd.read_csv(path / "texts.csv")

Let's look at memory consumption without `GradientCheckpointing`

In [None]:
nvidia_smi_idx = 2


def gpu_memory(device_idx=nvidia_smi_idx):
    return GPU.getGPUs()[device_idx].memoryUsed

In [None]:
learn = BlearnerForSequenceClassification.from_data(
    imdb_df, "roberta-large", dl_kwargs={"bs": 4}
)

learn.fit_one_cycle(1, lr_max=1e-3)

base_mem = gpu_memory()
print(f"{base_mem} MBs used.")

reset_memory(learn)

epoch,train_loss,valid_loss,f1_score,accuracy,time
0,0.341047,0.237419,0.918033,0.925,00:57


9499.0 MBs used.


Let's look at memory consumption *with* `GradientCheckpointing`

In [None]:
learn = BlearnerForSequenceClassification.from_data(
    imdb_df, "roberta-large", dl_kwargs={"bs": 4}
)

learn.fit_one_cycle(1, lr_max=1e-3, cbs=[GradientCheckpointing()])

check_mem = gpu_memory()
print(f"{check_mem} MBs used.")

test_eq(base_mem > check_mem, True)
reset_memory(learn)

epoch,train_loss,valid_loss,f1_score,accuracy,time
0,0.299704,0.2229,0.920455,0.93,01:22


4297.0 MBs used.


## Export -

In [None]:
# |hide
nbdev_export()