In [None]:
#|default_exp callbacks
#|default_cls_lvl 3

In [None]:
#| nbflags skip_exec


In [None]:
#|hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# callbacks

> Callbacks used by the BLURR library.

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
#|export
import importlib, sys, torch
from typing import Any, Callable, Dict, List, Optional, Union, Type

from fastcore.all import *
from fastai.callback.all import *
from fastai.imports import *
from fastai.learner import *
from fastai.torch_core import *
from transformers import PreTrainedModel

In [None]:
#| echo: false
import gc, pdb

import GPUtil as GPU
from IPython.display import display
from fastai.text.all import *
from fastcore.test import *
from nbdev.showdoc import show_doc

from blurr.text.modeling.all import *

In [None]:
#|hide
#|cuda

nvidia_smi_idx = 1
torch.cuda.set_device(1)
print(f"Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}")


Using GPU #1: NVIDIA GeForce RTX 3080


## Gradient Checkpointing

In [None]:
#|export
class CheckpointingNotSupported(Exception):
    def __init__(self, msg="Model does not support gradient checkpointing."):
        super().__init__(msg)

In [None]:
#|export
class GradientCheckpointing(Callback):
    """A fastai callback to enable gradient checkpointing for compatible HuggingFace models."""

    def before_fit(self):
        """Enable gradient checkpointing on before_fit event."""
        
        # Check that huggingface model supports gradient checkpointing
        if not self.model.hf_model.supports_gradient_checkpointing:
            raise CheckpointingNotSupported()
            
        if self.model.hf_model.is_gradient_checkpointing == False:
            self.model.hf_model.gradient_checkpointing_enable()
            
    def after_fit(self):
        """Disable gradient checkpointing on after_fit event."""
        if self.model.hf_model.is_gradient_checkpointing:
            self.model.hf_model.gradient_checkpointing_disable()
    
    @staticmethod
    def supported(model: PreTrainedModel):
        """Tests whether a HuggingFace `PreTrainedModel` supports gradient checkpointing."""
        return model.supports_gradient_checkpointing

In [None]:
def gpu_memory(device_idx=nvidia_smi_idx):
    return GPU.getGPUs()[device_idx].memoryUsed

In [None]:
# step 1: load Data
path = untar_data(URLs.IMDB_SAMPLE)
model_path = Path("models")
imdb_df = pd.read_csv(path / "texts.csv")

In [None]:
# step 2: create Learner
learn = BlearnerForSequenceClassification.from_data(
    imdb_df, 
    "roberta-large", 
    dl_kwargs={"bs": 4}
)

In [None]:
# train for a single epoch for baseline memory usage
learn.fit_one_cycle(1, lr_max=1e-3)

base_mem = gpu_memory()
print(f"{base_mem} MBs used.")

# Clear gpu memory
reset_memory()

epoch,train_loss,valid_loss,f1_score,accuracy,time
0,0.305118,0.275218,0.914286,0.925,00:32


8059.0 MBs used.


In [None]:
# train with GradientCheckpointing
learn.fit_one_cycle(1, lr_max=1e-3, cbs=[GradientCheckpointing()])

check_mem = gpu_memory()
print(f"{check_mem} MBs used.")

test_eq(base_mem > check_mem, True)

epoch,train_loss,valid_loss,f1_score,accuracy,time
0,0.165095,0.181498,0.928177,0.935,00:44


2971.0 MBs used.


## Export -

In [None]:
#|hide
from nbdev import nbdev_export

nbdev_export()