In [None]:
import importlib
import os
import sys

## Intro
The goal of this notebook is to show how to use an arbitrary model with the LeMa training loop.

In this case, we will adapt [nanogpt](https://github.com/karpathy/nanoGPT), and train it using both the Lema and HuggingFace training loops

## Setup

This notebook assumes that you have already installed the `lema` package. If you haven't, you can install it by running `!pip install lema`.

We start then by cloning the nanoGPT repository, and adding nanoGPT to our python path


In [None]:
module_folder = "/tmp/lema/nanoGPT"

# Clone the nanoGPT repo
if not os.path.isdir(module_folder):
    !mkdir -p $module_folder
    !git clone https://github.com/karpathy/nanoGPT $module_folder
else:
    print("nanoGPT already cloned!")

sys.path.append(module_folder)

Next we install the required dependencies. 

In [None]:
if importlib.util.find_spec("tiktoken") is not None:
    print("tiktoken is already installed!")
else:
    !pip install tiktoken

## Adapting nanoGPT model

In [None]:
import torch.nn.functional as F
from model import GPT, GPTConfig  # import from ~/nanoGPT/model.py

from oumi.core import registry


@registry.register("lema-nanoGPT", registry_type=registry.RegistryType.MODEL)
class LemaNanoGPT(GPT):
    def __init__(self, **kwargs):
        """Initializes an instance of the class."""
        gpt_config = GPTConfig()
        gpt_config.bias = False

        super().__init__(gpt_config)

    def forward(self, input_ids, labels=None, attention_mask=None):
        """Performs the forward pass of the model."""
        # Update the return format to be compatible with our Trainer.
        logits, loss = super().forward(idx=input_ids, targets=labels)
        outputs = {"logits": logits}
        if loss:
            outputs["loss"] = loss
        return outputs

    def criterion(self):
        """Returns the criterion used for calculating the loss."""
        return F.cross_entropy

## Training

Ok now we are ready to train our model! we can start from the default gpt2 config, and edit as needed.

In [None]:
import oumi
from oumi.core.configs import TrainerType, TrainingConfig

In [None]:
# Starting from the default GPT-2 config
config_path = "../configs/lema/gpt2.pt.mac.yaml"
config = TrainingConfig.from_yaml(config_path)

# Update to use our newly registered nanoGPT model
config.model.model_name = "lema-nanoGPT"  # needs to match the registered model name

# We do not have a custom tokenizer, but we can use the GPT-2 tokenizer from HuggingFace
config.model.tokenizer_name = "gpt2"

config.training.trainer_type = TrainerType.LEMA
config.training.max_steps = 10
config.training.logging_steps = 1
config.training.gradient_accumulation_steps = 1
config.training.enable_wandb = False
config.training.enable_tensorboard = False

In [None]:
oumi.train(config)