In [42]:
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForLanguageModeling

checkpoint = "meta-llama/Llama-3.2-1B-Instruct"

model = AutoModelForCausalLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [55]:
from datasets import load_dataset, DatasetDict, Dataset

raw_train_dataset = load_dataset("json", data_files="./data/pokemon-train.json")
raw_eval_dataset = load_dataset("json", data_files="./data/pokemon-eval.json")

raw_dataset = DatasetDict({
    "train": raw_train_dataset["train"],
    "eval": raw_eval_dataset["train"]
})

In [61]:
raw_dataset["train"][0]

{'prompt': 'Tell me about Bulbasaur.',
 'response': {'Abilities': ['Overgrow', 'Chlorophyll'],
  'Flavor Text': "Bulbasaur can be seen napping in bright sunlight. There is a seed on its back. By soaking up the sun's rays, the seed grows into a large plant.",
  'Height': '0.7 meters',
  'Name': 'Bulbasaur',
  'Pokédex Number': 1,
  'Type': ['Grass', 'Poison'],
  'Weight': '6.9 kg'}}

In [60]:
context_length = 128

def tokenize_data(dataset):
    return tokenizer(dataset["prompt"], truncation=True, max_length=context_length,
    return_overflowing_tokens=True,
    return_length=True, )

tokenized_dataset = raw_dataset.map(tokenize_data, batched=True, remove_columns=["prompt"])

Map: 100%|██████████| 55/55 [00:00<00:00, 1584.15 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 4193.66 examples/s]


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="poke-training",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    eval_steps=5_000,
    logging_steps=5_000,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    weight_decay=0.1,
    warmup_steps=1_000,
    lr_scheduler_type="cosine",
    learning_rate=5e-4,
    save_steps=5_000)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    data_collator=data_collator,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["eval"]
)

ValueError: fp16 mixed precision requires a GPU (not 'mps').