# Migrate form PyTorch to Accelerate

### Distilbert example

## Pytorch example

### 1. Load Model & Datasets

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

model_id="distilbert-base-uncased"
dataset_id="emotion"

model=AutoModelForSequenceClassification.from_pretrained(model_id,num_labels=6)
tokenizer=AutoTokenizer.from_pretrained(model_id)

dataset= load_dataset(dataset_id)

def preprocess(sample):
  enc = tokenizer(sample["text"],truncation=True)
  if "label" in sample:
    enc["labels"] = sample["label"]
  return enc

dataset=dataset.map(preprocess,batched=True,remove_columns=dataset["train"].column_names)

print(f"Train dataset size: {len(dataset['train'])}")
print(f"Dataset columns: {dataset['train'].column_names}")
print(f"Validation dataset size: {len(dataset['validation'])}")


### 2. Hyperparameters, Dataloader, Optimizer

In [10]:
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import get_linear_schedule_with_warmup,DataCollatorWithPadding

###### Hyperparameters ######
TRAIN_BATCH_SIZE = 64
EVAL_BATCH_SIZE = 64
LEARNING_RATE = 3e-5
NUM_EPOCHS = 3

###### Data Loaders ######
data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)

# Instantiate dataloaders.
train_dataloader = DataLoader(
  dataset["train"], shuffle=True, collate_fn=data_collator, batch_size=TRAIN_BATCH_SIZE
)
eval_dataloader = DataLoader(
  dataset["validation"], shuffle=False, collate_fn=data_collator, batch_size=EVAL_BATCH_SIZE
)

###### Optimizer ######
optimizer = AdamW(params=model.parameters(), lr=LEARNING_RATE)
lr_scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_dataloader) * NUM_EPOCHS)

### 3. Vanilla PyTorch Training

In [11]:
import torch
import evaluate
from tqdm import tqdm
import logging

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# epoch train loop
for epoch in range(NUM_EPOCHS):
  model.train()
  # mini-batch train loop
  with tqdm(train_dataloader) as pbar:
      pbar.set_description(f"Epoch {epoch+1}")
      for batch in pbar:
        # reset gradient
        optimizer.zero_grad()
        # move to device
        inputs = {k : v.to(device) for k,v in batch.items()}
        # forward pass
        outputs = model(**inputs)
        # backward pass
        outputs.loss.backward()
        optimizer.step()
        lr_scheduler.step()
        pbar.set_postfix(loss=float(outputs.loss))



Epoch 0: 100%|██████████| 250/250 [01:12<00:00,  3.47it/s, loss=0.239]
Epoch 1: 100%|██████████| 250/250 [01:16<00:00,  3.28it/s, loss=0.157] 
Epoch 2: 100%|██████████| 250/250 [01:19<00:00,  3.15it/s, loss=0.0897]


### 4. Evaluate model

In [28]:
# evaluate model
metric = evaluate.load("accuracy")
model.eval()
with tqdm(eval_dataloader) as pbar:
    for batch in pbar:
      # move to device
      inputs = {k : v.to(device) for k,v in batch.items()}
      # forward pass
      with torch.no_grad():
        outputs = model(**inputs)
      # get predicted label
      predictions = outputs.logits.argmax(dim=-1)
      # add to metric
      metric.add_batch(references=batch["labels"].tolist(), predictions=predictions.tolist())
cur_metric = metric.compute()
print(f"Accuracy: {cur_metric['accuracy']*100}%")


100%|██████████| 63/63 [00:04<00:00, 15.60it/s]

94.0%



