Fine-tuning with transformers
===

Reminding myself about some of the details of finetuning.

 - FLAN-T5: https://huggingface.co/google/flan-t5-base
 - T5: https://huggingface.co/docs/transformers/model_doc/t5
 

In [32]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import transformers
from accelerate import Accelerator, DistributedType
from transformers import T5ForConditionalGeneration, T5Tokenizer

In [9]:
data_dir = (Path.cwd() / ".." / "data").resolve()
assert data_dir.exists()
raw_data_dir = data_dir / "raw"
interim_data_dir = data_dir / "interim"
processed_data_dir = data_dir / "processed"

In [3]:
# t5_model_name = "google/flan-t5-base"  # weights <1G
t5_model_name = "google/flan-t5-large"  # weights ~3G
# t5_model_name = "google/flan-t5-xl"  # weights ~12G
# t5_model_name = "google/flan-t5-xxl"  # weights ~46G
tokenizer = T5Tokenizer.from_pretrained(t5_model_name)
model = T5ForConditionalGeneration.from_pretrained(t5_model_name, device_map="auto")

In [4]:
input_text = "translate English to French: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
input_ids

tensor([[13959,  1566,    12,  2379,    10,   571,   625,    33,    25,    58,
             1]], device='cuda:0')

In [5]:
%%time
outputs = model.generate(input_ids, max_new_tokens=20)
print(tokenizer.decode(outputs[0]))

<pad> <unk> quelle âge avez-vous?</s>
CPU times: user 783 ms, sys: 179 ms, total: 963 ms
Wall time: 3.59 s


In [10]:
model.save_pretrained(interim_data_dir / "flat-t5-large-mimic")

In [12]:
!du -h {interim_data_dir / "flat-t5-large-mimic"}/*

24K	/panfs/jay/groups/25/lana/levon003/repos/scratchpad/finetune/data/interim/flat-t5-large-mimic/config.json
24K	/panfs/jay/groups/25/lana/levon003/repos/scratchpad/finetune/data/interim/flat-t5-large-mimic/generation_config.json
3.6G	/panfs/jay/groups/25/lana/levon003/repos/scratchpad/finetune/data/interim/flat-t5-large-mimic/pytorch_model.bin


In [23]:
input_text = 'Say "Zachary Levonian" backwards'
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids, max_new_tokens=20)
print(tokenizer.decode(outputs[0]))

<pad> Zachary Levonian</s>


In [27]:
input_text = 'Say "Zachary Levonian" backwards'
output_text = "nainoveL yrahcaZ"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
labels = tokenizer(output_text, return_tensors="pt").input_ids

In [30]:
loss = model(input_ids=input_ids, labels=labels).loss
loss.item()

5.19366979598999

In [36]:
accelerator = Accelerator(gradient_accumulation_steps=1)
accelerator.state

Distributed environment: NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda

Mixed precision type: no

In [37]:
accelerator.wait_for_everyone()  # probably unnecessary