### Connect go Google Drive

In [None]:
# mount google drive to access cloned repo
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


### Install Dependencies

In [None]:
!pip install datasets
!pip install -U transformers
!pip install bitsandbytes==0.43.2
!pip install trl==0.14
!pip install evaluate
!pip install rouge_score

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.2-py3-none-any.whl (485 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.4/485.4 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading 

# Optimised Code

In [None]:
cd drive/MyDrive/GitHub/finetuning-llm/

/content/drive/MyDrive/GitHub/finetuning-llm


In [None]:
pwd

'/content/drive/MyDrive/GitHub/finetuning-llm'

In [None]:
# Provide Colab a systemic path:
import sys
sys.path.append('/content/drive/My Drive/GitHub/finetuning-llm/')

In [None]:
import torch
import os

from src.train.callbacks import BatchSizeCallback, MetricsLoggingCallback
from src.train.finetune_helpers import ModelArguments, ScriptArguments
from src.train.finetune_seq2seq import *

os.environ["WANDB_LOG_MODEL"] = "checkpoint"  # log all model checkpoints

In [None]:
model_args = ModelArguments(
    model_name="google-t5/t5-small",
    use_4bit=True,
    use_nested_quant=True,
    bnb_4bit_compute_dtype="bfloat16",
    bnb_4bit_quant_dtype="nf4",
    lora_alpha=128,
    lora_dropout=0.1,
    lora_r=8
)

script_args = ScriptArguments(
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    auto_find_batch_size=True,
    # gradient_accumulation_steps=4,
    # eval_accumulation_steps=2,
    weight_decay=0.01,
    max_seq_length=512,
    dataset_name="4DR1455/finance_questions",
    bf16=True,
    learning_rate=2e-5,
    optim='adafactor',
    lr_scheduler_type='cosine',
    packing=True,
    num_train_epochs=9,
    # max_steps=20,
    save_steps=50,
    logging_steps=50,
    eval_steps=50,
    warmup_steps=50,
    eval_strategy='steps',
    run_name="Google-T5-Small-v3.0",
    report_to="wandb",
    save_safetensors=True,
    label_names=['labels'],
    load_best_model_at_end=True,
    dataloader_num_workers=10,
)

seq2seq_llm = FinetuneSeq2SeqLLMs(model_args=model_args, script_args=script_args)

tokenizer = seq2seq_llm.get_tokenizer()
peft_config = seq2seq_llm.get_peft_config()
peft_model = seq2seq_llm.get_model(peft_config)

data_collator = data_collator(tokenizer=tokenizer, peft_model=peft_model)

train_set, eval_set, test_set = seq2seq_llm.tokenize_split_dataset(sample=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

trainable params: 589,824 || all params: 61,096,448 || trainable%: 0.9654


README.md:   0%|          | 0.00/163 [00:00<?, ?B/s]

finance_questions_dataset.json:   0%|          | 0.00/53.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/53937 [00:00<?, ? examples/s]

Map:   0%|          | 0/17979 [00:00<?, ? examples/s]

In [None]:
seq2seq_llm.train(
    peft_model=peft_model,
    peft_config=peft_config,
    data_collator=data_collator)

  trainer = SFTTrainer(


Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mleonsunwl[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,R1,R2,Rl,Rlsum
50,0.4868,0.447295,0.0131,0.0002,0.0129,0.0129
100,0.3515,0.312866,0.0136,0.0,0.0136,0.0136
150,0.2713,0.267641,0.0293,0.0006,0.0291,0.029
200,0.2243,0.223328,0.0728,0.0023,0.0703,0.0701
250,0.1913,0.18933,0.1258,0.0067,0.1201,0.1201
300,0.1735,0.169333,0.1869,0.0183,0.1748,0.1748
350,0.1648,0.154907,0.2338,0.0335,0.2211,0.2212
400,0.1573,0.143812,0.2705,0.0541,0.2583,0.2582
450,0.1544,0.138955,0.2899,0.0664,0.2781,0.2778
500,0.152,0.138134,0.2926,0.0681,0.2805,0.2803


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-50)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-100)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-150)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-200)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-250)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-300)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-350)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-400)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-450)... Done. 0.0s


[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-500)... Done. 0.0s
[34m[1mwandb[0m: Adding directory to artifact (./results/google-t5/t5-small/checkpoint-504)... Done. 0.0s


In [None]:
import wandb
wandb.finish()

0,1
eval/R1,▁▁▁▂▄▅▇▇██
eval/R2,▁▁▁▁▂▃▄▇██
eval/RL,▁▁▁▃▄▅▆▇██
eval/RLsum,▁▁▁▂▄▅▆▇██
eval/loss,█▅▄▃▂▂▁▁▁▁
eval/runtime,█▁▁▁▁▂▂▃▂▂
eval/samples_per_second,▁███▇▇▇▆▇▇
eval/steps_per_second,▁███▇▇▇▆▇▇
train/epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇███
train/global_step,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇███

0,1
eval/R1,0.2926
eval/R2,0.0681
eval/RL,0.2805
eval/RLsum,0.2803
eval/loss,0.13813
eval/runtime,7.8857
eval/samples_per_second,228.387
eval/steps_per_second,3.678
total_flos,1.74975256363008e+16
train/epoch,8.84956


# Fusing Models

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel, PeftConfig, AutoPeftModelForSeq2SeqLM
import tqdm as notebook_tqdm

In [None]:
pwd

'/content/drive/MyDrive/GitHub/finetuning-llm'

In [None]:
SEQUENCE_MODELS = [
        "google-t5/t5-base",
        "google-t5/t5-small"
        ]

# Load base model
# base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
# base_model_name = "google-t5/t5-base"
base_model_name = "google-t5/t5-small"

tokenizer = AutoTokenizer.from_pretrained(
        base_model_name,
        load_in_8bit=True,
        device_map="auto",
        use_fast=True,
        padding_side="right"
        )
tokenizer.pad_token = tokenizer.eos_token
# if base_model_name in SEQUENCE_MODELS:
#   model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)
# else:
#   model = AutoModelForCausalLM.from_pretrained(base_model_name)

# Load LoRA-adapted model
peft_model_local_path = "models/google-t5/t5-small/2025-02-13/02-44-10"
# peft_model_path = f"../{peft_model_local_path}"
# peft_config = PeftConfig.from_pretrained(peft_model_path)
model = AutoPeftModelForSeq2SeqLM.from_pretrained(peft_model_local_path)

# set output directory for saving models and tokenizers
output_dir = f"{peft_model_local_path}/fused_model"


In [None]:
# fuse lora weights
model = model.merge_and_unload()

In [None]:
tokenizer.save_pretrained(output_dir)

('models/google-t5/t5-small/2025-02-13/02-44-10/fused_model/tokenizer_config.json',
 'models/google-t5/t5-small/2025-02-13/02-44-10/fused_model/special_tokens_map.json',
 'models/google-t5/t5-small/2025-02-13/02-44-10/fused_model/spiece.model',
 'models/google-t5/t5-small/2025-02-13/02-44-10/fused_model/added_tokens.json',
 'models/google-t5/t5-small/2025-02-13/02-44-10/fused_model/tokenizer.json')

In [None]:
model.save_pretrained(output_dir)