In [1]:
from accelerate import Accelerator
from transformers import AdamW, AutoTokenizer, AutoModelForCausalLM, get_scheduler
from torch.utils.data import DataLoader
from datasets import load_dataset
from tqdm import tqdm

In [2]:
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

In [9]:
# Define the Tokenize function
def tokenize_function(examples):
    outputs = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
    #outputs = tokenizer(examples["text"], return_tensors="pt", padding=True)
    return outputs

# Load the dataset and create a dataloader
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
tokenized_dataset.set_format("torch")

Map:   0%|          | 0/36718 [00:00<?, ? examples/s]

In [4]:
def training_function():
    
    model = AutoModelForCausalLM.from_pretrained("gpt2")
    
    # Initialize the accelerator
    accelerator = Accelerator()
    
    optimizer = AdamW(model.parameters(), lr=3e-5)

    dataloader = DataLoader(tokenized_dataset, batch_size=8, shuffle=True)

    # Move the model to the device
    dataloader, model, optimizer = accelerator.prepare(
        dataloader, model, optimizer
    )

    num_epochs = 3
    num_training_steps = num_epochs * len(dataloader)
    lr_scheduler = get_scheduler(
        "linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )

    progress_bar = tqdm(range(num_training_steps))
    model.train()
    
    for epoch in range(num_epochs):
        for batch in dataloader:
            outputs = model(batch['input_ids'], labels=batch['input_ids'])
            loss = outputs.loss
            accelerator.backward(loss)
    
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(1)


In [5]:
from accelerate import notebook_launcher

notebook_launcher(training_function, num_processes=3)

ValueError: To launch a multi-GPU training from your notebook, you need to avoid running any instruction using `torch.cuda` in any cell. Restart your notebook and make sure no cells use any CUDA function.