<a href="https://colab.research.google.com/github/preetamjumech/LLM/blob/main/Fine_Tuning_LLM_5x_Faster_with_Unsloth_02_11_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install unsloth "xformers==0.0.28.post2"
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [3]:
from datasets import load_dataset

In [9]:
max_seq_length = 2048

In [4]:
dataset = load_dataset("imdb", split="train")

README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [5]:
max_seq_length = 2048

In [6]:
dataset = load_dataset("imdb", split="train")

In [7]:
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 25000
})

In [10]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype=None,
    load_in_4bit=True
)

==((====))==  Unsloth 2024.10.7: Fast Mistral patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [11]:
model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        lora_alpha = 16,
        lora_dropout = 0,
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        bias = "none",
        use_gradient_checkpointing = True,
        random_state = 3407,
        max_seq_length = max_seq_length
)


Unsloth 2024.10.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [12]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        max_steps = 60,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        output_dir = "unsloth-test",
        optim = "adamw_8bit",
        seed = 3407,
    ),
)


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [13]:
inputs = tokenizer(
    [
      "I really liked the movie because it shows emotions and talks humanity."
    ],
    return_tensors="pt",
).to("cuda")


In [14]:
inputs

{'input_ids': tensor([[    1,   315,  1528,  8232,   272,  5994,  1096,   378,  4370, 13855,
           304, 15066, 17676, 28723]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [17]:
FastLanguageModel.for_inference(model)
outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)

In [18]:
outputs

tensor([[    1,   315,  1528,  8232,   272,  5994,  1096,   378,  4370, 13855,
           304, 15066, 17676, 28723,   661,   349,   264,  1215,  1179,  5994,
         28723,   315,  1528,  8232,   272,  5994,  1096,   378,  4370, 13855,
           304, 15066, 17676, 28723,   661,   349,   264,  1215,  1179,  5994,
         28723,    13,    13, 28737,  1528,  8232,   272,  5994,  1096,   378,
          4370, 13855,   304, 15066, 17676, 28723,   661,   349,   264,  1215,
          1179,  5994, 28723,    13,    13, 28737,  1528,  8232,   272,  5994,
          1096,   378,  4370, 13855,   304, 15066, 17676, 28723,   661,   349,
           264,  1215,  1179,  5994, 28723,    13,    13, 28737,  1528,  8232,
           272,  5994,  1096,   378,  4370, 13855,   304, 15066, 17676, 28723,
           661,   349,   264,  1215,  1179,  5994, 28723,    13,    13, 28737,
          1528,  8232,   272,  5994,  1096,   378,  4370, 13855,   304, 15066,
         17676, 28723,   661,   349,   264,  1215,  

In [19]:
tokenizer.batch_decode(outputs)

['<s> I really liked the movie because it shows emotions and talks humanity. It is a very good movie. I really liked the movie because it shows emotions and talks humanity. It is a very good movie.\n\nI really liked the movie because it shows emotions and talks humanity. It is a very good movie.\n\nI really liked the movie because it shows emotions and talks humanity. It is a very good movie.\n\nI really liked the movie because it shows emotions and talks humanity. It is a very good movie.\n\nI really liked the movie because it shows emotions and talks humanity. It is a very good movie.\n\nI really liked the movie because it shows emotions and talks']

In [20]:
model.save_pretrained("lora_model")