In [1]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
!pip install wandb --upgrade

Collecting wandb
  Downloading wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.16.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_

In [3]:
import wandb
import os
import re
import math
from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt
from google.colab import userdata
from huggingface_hub import login


PROJECT_NAME = "lintang-medical-llama3.2-id-a100-batch32-grad4"
HF_USER = "lintangbs"

RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"


STEPS = 50
SAVE_STEPS = 5000
LOG_TO_WANDB = True



In [4]:
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [5]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)


Token is valid (permission: fineGrained).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [6]:

wandb_api_key = userdata.get('WANDB_API_KEY')
os.environ["WANDB_API_KEY"] = wandb_api_key
wandb.login()


os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "false"
os.environ["WANDB_WATCH"] = "gradients"

[34m[1mwandb[0m: Currently logged in as: [33mlintangbirdasaputra23[0m ([33mlintangbirdasaputra23-universitas-gadjah-mada-library[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
if LOG_TO_WANDB:
  run = wandb.init(project=PROJECT_NAME, name=RUN_NAME)

In [8]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.0: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [9]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 4, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 8,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.10.0 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [10]:

from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)



from datasets import load_dataset
dataset = load_dataset("lintangbs/medical-qa-id-good", split = "train")

README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

medical-qa-id-sangat-kecil-good.csv:   0%|          | 0.00/62.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/66318 [00:00<?, ? examples/s]

In [11]:
print(dataset.column_names)

['Unnamed: 0', 'system_prompt', 'qas_id', 'question_text', 'orig_answer_texts', 'answer_lengths']


In [12]:
dataset[0]

{'Unnamed: 0': 0,
 'system_prompt': 'Imagine you are a medical knowledgeable expert, share your insights.',
 'qas_id': 'LLFET2',
 'question_text': 'Saya bangun pagi ini dengan perasaan seluruh ruangan berputar ketika saya sedang duduk. Aku pergi ke kamar mandi dengan berjalan terhuyung-huyung, ketika aku mencoba fokus aku merasa mual. Aku coba muntah tapi tak mau keluar.. Setelah minum panadol dan tidur beberapa jam, aku masih merasakan hal yang sama.. Ngomong-ngomong, kalau aku berbaring atau duduk, kepalaku tidak berputar, hanya ketika aku mau bergerak lalu aku merasakan seluruh dunia berputar.. Dan apakah itu merupakan rasa tidak nyaman di perut yang normal pada saat bersamaan? Tadi setelah saya buang air, putarannya berkurang jadi saya tidak yakin apakah itu sambung atau kebetulan.. Terima kasih dok!',
 'orig_answer_texts': 'Hai, Terima kasih telah mengirimkan pertanyaan Anda. Kemungkinan besar penyebab gejala Anda adalah vertigo posisi paroksismal jinak (BPPV), sejenis vertigo per

In [13]:
from unsloth import to_sharegpt
dataset = to_sharegpt(
    dataset,
    merged_prompt = "{question_text}",
    output_column_name = "orig_answer_texts",
    conversation_extension = 3, # Select more to handle longer conversations
)

Merging columns:   0%|          | 0/66318 [00:00<?, ? examples/s]

Converting to ShareGPT:   0%|          | 0/66318 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/66318 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/66318 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/66318 [00:00<?, ? examples/s]

Extending conversations:   0%|          | 0/66318 [00:00<?, ? examples/s]

In [14]:
dataset[0]

{'conversations': [{'from': 'human',
   'value': "('Saya bangun pagi ini dengan perasaan seluruh ruangan berputar ketika saya sedang duduk. Aku pergi ke kamar mandi dengan berjalan terhuyung-huyung, ketika aku mencoba fokus aku merasa mual. Aku coba muntah tapi tak mau keluar.. Setelah minum panadol dan tidur beberapa jam, aku masih merasakan hal yang sama.. Ngomong-ngomong, kalau aku berbaring atau duduk, kepalaku tidak berputar, hanya ketika aku mau bergerak lalu aku merasakan seluruh dunia berputar.. Dan apakah itu merupakan rasa tidak nyaman di perut yang normal pada saat bersamaan? Tadi setelah saya buang air, putarannya berkurang jadi saya tidak yakin apakah itu sambung atau kebetulan.. Terima kasih dok!',)"},
  {'from': 'gpt',
   'value': 'Hai, Terima kasih telah mengirimkan pertanyaan Anda. Kemungkinan besar penyebab gejala Anda adalah vertigo posisi paroksismal jinak (BPPV), sejenis vertigo perifer. Pada kondisi ini, gejala yang paling umum adalah pusing atau pusing, yang dipe

In [15]:
dataset[1]

{'conversations': [{'from': 'human',
   'value': "('Bayi saya buang air besar 5-6 kali sehari selama seminggu. Dalam beberapa hari terakhir jumlahnya meningkat menjadi 7 dan sangat encer dengan potongan-potongan hijau berserabut di dalamnya. Dia tidak tampak sakit, yaitu tidak ada suhu tubuh dan masih makan. Dia sekarang mengalami ruam popok yang sangat parah akibat buang air besar...tolong!',)"},
  {'from': 'gpt',
   'value': 'Hai... Terima kasih telah berkonsultasi di Chat Dokter. Tampaknya anak Anda terkena diare karena virus. Setelah dimulai, diperlukan waktu 5-7 hari untuk pulih sepenuhnya. Kecuali jika anak-anak mengalami keluaran urin yang rendah atau sangat mengantuk atau sangat mengantuk atau mengeluarkan darah atau muntah-muntah berwarna hijau... Anda tidak perlu khawatir. Tidak perlu menggunakan antibiotik kecuali ada darah yang mengalir. Antibiotik dapat memburuk jika digunakan secara tidak perlu sehingga menyebabkan diare terkait antibiotik. Saya sarankan Anda menggunakan 

In [16]:
def remove_parentheses(data):
    curr_conversations = []
    for j in range(len(data["conversations"])):
      if data["conversations"][j]["from"] == "human":
        data["conversations"][j]["value"] = data["conversations"][j]["value"].replace("('", "").replace("',)", "")
      curr_conversations.append(data["conversations"][j])
    data["conversations"] = curr_conversations
    return data


In [17]:
dataset = dataset.map(remove_parentheses)

Map:   0%|          | 0/66318 [00:00<?, ? examples/s]

In [18]:
dataset[5]

{'conversations': [{'from': 'human',
   'value': 'Saya F 38 dalam kondisi yang baik, berolahraga (melakukan triathlon) secara teratur tetapi menderita sakit punggung karena berbagai alasan sepanjang hidup saya. Sekarang saya sering terbangun dengan nyeri punggung bawah dan pinggul yang parah tanpa alasan. hari ini rasa sakitnya hampir membuat nafasku sesak saat aku bergerak. Rasanya nyeri tumpul ketika saya hanya berbaring tetapi saat saya melakukan gerakan apa pun, saya merasakan nyeri yang tajam dan terkadang menjalar ke kaki saya.'},
  {'from': 'gpt',
   'value': 'Hai, Dari riwayat sepertinya Anda mengalami perubahan degeneratif pada tulang punggung bagian bawah yang menyebabkan tekanan pada saraf terjepit. Mungkin ada osteomalacia atau osteoporosis juga. Lakukan rontgen daerah lumbosakral untuk osteoartritis. Fisioterapi seperti latihan ekstensi punggung akan sangat membantu. Ambil suntikan atau obat B1, B6, B!2. Konsumsi suplemen kalsium, vitamin A, dan D. Oke dan berhati-hatilah.

In [19]:
from unsloth import standardize_sharegpt
dataset = standardize_sharegpt(dataset)

Standardizing format:   0%|          | 0/66318 [00:00<?, ? examples/s]

In [20]:
dataset[0]

{'conversations': [{'content': 'Saya bangun pagi ini dengan perasaan seluruh ruangan berputar ketika saya sedang duduk. Aku pergi ke kamar mandi dengan berjalan terhuyung-huyung, ketika aku mencoba fokus aku merasa mual. Aku coba muntah tapi tak mau keluar.. Setelah minum panadol dan tidur beberapa jam, aku masih merasakan hal yang sama.. Ngomong-ngomong, kalau aku berbaring atau duduk, kepalaku tidak berputar, hanya ketika aku mau bergerak lalu aku merasakan seluruh dunia berputar.. Dan apakah itu merupakan rasa tidak nyaman di perut yang normal pada saat bersamaan? Tadi setelah saya buang air, putarannya berkurang jadi saya tidak yakin apakah itu sambung atau kebetulan.. Terima kasih dok!',
   'role': 'user'},
  {'content': 'Hai, Terima kasih telah mengirimkan pertanyaan Anda. Kemungkinan besar penyebab gejala Anda adalah vertigo posisi paroksismal jinak (BPPV), sejenis vertigo perifer. Pada kondisi ini, gejala yang paling umum adalah pusing atau pusing, yang diperburuk dengan adanya

In [21]:
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }

In [22]:
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/66318 [00:00<?, ? examples/s]

In [23]:
dataset[5]

{'conversations': [{'content': 'Saya F 38 dalam kondisi yang baik, berolahraga (melakukan triathlon) secara teratur tetapi menderita sakit punggung karena berbagai alasan sepanjang hidup saya. Sekarang saya sering terbangun dengan nyeri punggung bawah dan pinggul yang parah tanpa alasan. hari ini rasa sakitnya hampir membuat nafasku sesak saat aku bergerak. Rasanya nyeri tumpul ketika saya hanya berbaring tetapi saat saya melakukan gerakan apa pun, saya merasakan nyeri yang tajam dan terkadang menjalar ke kaki saya.',
   'role': 'user'},
  {'content': 'Hai, Dari riwayat sepertinya Anda mengalami perubahan degeneratif pada tulang punggung bagian bawah yang menyebabkan tekanan pada saraf terjepit. Mungkin ada osteomalacia atau osteoporosis juga. Lakukan rontgen daerah lumbosakral untuk osteoartritis. Fisioterapi seperti latihan ekstensi punggung akan sangat membantu. Ambil suntikan atau obat B1, B6, B!2. Konsumsi suplemen kalsium, vitamin A, dan D. Oke dan berhati-hatilah.',
   'role': '

In [24]:
dataset = dataset.train_test_split(test_size=0.1, shuffle=True)


In [25]:
dataset

DatasetDict({
    train: Dataset({
        features: ['conversations', 'text'],
        num_rows: 59686
    })
    test: Dataset({
        features: ['conversations', 'text'],
        num_rows: 6632
    })
})

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq

from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["test"],
    dataset_text_field = "text",
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 8, # 2 -> 114 jam  utk 3 epoch , batch_size 16 gak cukup gpu vramnya
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",

        report_to="wandb" if LOG_TO_WANDB else None,
        run_name=RUN_NAME,
        save_strategy="steps",
        hub_strategy="every_save",
        push_to_hub=True,
        hub_model_id=HUB_MODEL_NAME,
        hub_private_repo=True,
        save_steps=SAVE_STEPS,
    ),
)

Map (num_proc=2):   0%|          | 0/59686 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/6632 [00:00<?, ? examples/s]

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map:   0%|          | 0/59686 [00:00<?, ? examples/s]

Map:   0%|          | 0/6632 [00:00<?, ? examples/s]

In [None]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n('Apa pengobatan pilihan yang direkomendasikan untuk penghentian opioid akut?',)<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nPengobatan pilihan yang direkomendasikan untuk penghentian opioid akut adalah metadon dosis rendah dan obat tambahan untuk mengatasi gejala.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n('Seorang laki-laki berusia 42 tahun datang ke dokter dengan keluhan sariawan yang nyeri sejak 1 minggu. Dia menderita penyakit maag serupa selama setahun terakhir. Setiap episode berlangsung sekitar satu minggu dan sembuh tanpa meninggalkan bekas luka. Ia juga pernah mengalami tukak serupa di skrotumnya, namun tukak tersebut meninggalkan bekas. Dia tidak meminum obat. Suhu tubuhnya 36,8°C (98,2°F), dan tanda-tanda vital lainnya stabil. Pada pemeriksaan fisik, terlihat ulkus ke

In [33]:

gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA A100-SXM4-40GB. Max memory = 39.564 GB.
27.041 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 59,686 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 1,865
 "-____-"     Number of trainable parameters = 6,078,464


Step,Training Loss
1,1.9161
2,2.0077
3,2.0317
4,1.9943
5,2.0279
6,2.0074
7,1.9755
8,2.0172
9,1.8646
10,1.9426


[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-1865)... Done. 0.1s


In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

12480.0784 seconds used for training.
208.0 minutes used for training.
Peak reserved memory = 37.971 GB.
Peak reserved memory for training = 0.0 GB.
Peak reserved memory % of max memory = 95.974 %.
Peak reserved memory for training % of max memory = 0.0 %.


In [None]:
model.push_to_hub(PROJECT_RUN_NAME, private=False)
tokenizer.push_to_hub(PROJECT_RUN_NAME, private=False)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")

README.md:   0%|          | 0.00/1.90k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Saved model to https://huggingface.co/lintang-medical-llama3.2-id-a100-batch32-grad4-2024-10-15_12.47.17


No files have been modified since last commit. Skipping to prevent empty commit.


Saved to the hub: lintang-medical-llama3.2-id-a100-batch32-grad4-2024-10-15_12.47.17


In [None]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

In [None]:
!zip -r outputs_a100.zip outputs

  adding: outputs/ (stored 0%)
  adding: outputs/tokenizer.json (deflated 74%)
  adding: outputs/README.md (deflated 55%)
  adding: outputs/checkpoint-1865/ (stored 0%)
  adding: outputs/checkpoint-1865/tokenizer.json (deflated 74%)
  adding: outputs/checkpoint-1865/scheduler.pt (deflated 56%)
  adding: outputs/checkpoint-1865/README.md (deflated 66%)
  adding: outputs/checkpoint-1865/rng_state.pth (deflated 25%)
  adding: outputs/checkpoint-1865/tokenizer_config.json (deflated 94%)
  adding: outputs/checkpoint-1865/adapter_config.json (deflated 54%)
  adding: outputs/checkpoint-1865/trainer_state.json (deflated 82%)
  adding: outputs/checkpoint-1865/special_tokens_map.json (deflated 71%)
  adding: outputs/checkpoint-1865/optimizer.pt (deflated 11%)
  adding: outputs/checkpoint-1865/training_args.bin (deflated 51%)
  adding: outputs/checkpoint-1865/adapter_model.safetensors (deflated 7%)
  adding: outputs/tokenizer_config.json (deflated 94%)
  adding: outputs/adapter_config.json (defla

Epoch 2


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq

from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["test"],
    dataset_text_field = "text",
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 8, # 2 -> 114 jam  utk 3 epoch , batch_size 16 gak cukup gpu vramnya
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 2,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",

        report_to="wandb" if LOG_TO_WANDB else None,
        run_name=RUN_NAME,
        save_strategy="steps",
        hub_strategy="every_save",
        push_to_hub=True,
        hub_model_id=HUB_MODEL_NAME,
        hub_private_repo=True,
        save_steps=SAVE_STEPS,

        evaluation_strategy="steps",
        eval_steps=500,
        do_eval=True,
        per_device_eval_batch_size = 4,
        eval_accumulation_steps = 4
    ),
)



In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

In [None]:
run = wandb.init()
artifact = run.use_artifact('lintang-medical-llama3.2-id-a100-batch32-grad4/model-2024-10-15_12.47.17:v1', type='model')
artifact_dir = artifact.download()



VBox(children=(Label(value='31.996 MB of 31.996 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁
train/global_step,▁

0,1
total_flos,1.567267733419475e+18
train/epoch,0.99987
train/global_step,1865.0
train_loss,0.0
train_runtime,0.2887
train_samples_per_second,206774.298
train_steps_per_second,6461.047


[34m[1mwandb[0m: \ 1 of 11 files downloaded...[34m[1mwandb[0m:   11 of 11 files downloaded.  


In [None]:
trainer.train(resume_from_checkpoint=artifact_dir)

  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 59,686 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 3,730
 "-____-"     Number of trainable parameters = 6,078,464
  checkpoint_rng_state = torch.load(rng_file)


Step,Training Loss,Validation Loss
2000,1.198,1.245411
2500,1.1937,1.222824
3000,1.1922,1.206397
3500,1.2108,1.196808


[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-3730)... Done. 0.1s


TrainOutput(global_step=3730, training_loss=0.6022312416308048, metrics={'train_runtime': 14334.4708, 'train_samples_per_second': 8.328, 'train_steps_per_second': 0.26, 'total_flos': 3.13267941319621e+18, 'train_loss': 0.6022312416308048, 'epoch': 1.9998659697091543})

In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

12480.0784 seconds used for training.
208.0 minutes used for training.
Peak reserved memory = 37.971 GB.
Peak reserved memory for training = 0.0 GB.
Peak reserved memory % of max memory = 95.974 %.
Peak reserved memory for training % of max memory = 0.0 %.


In [None]:
model.push_to_hub(PROJECT_RUN_NAME, private=False)
tokenizer.push_to_hub(PROJECT_RUN_NAME, private=False)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")

README.md:   0%|          | 0.00/2.66k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Saved model to https://huggingface.co/lintang-medical-llama3.2-id-a100-batch32-grad4-2024-10-15_12.47.17


No files have been modified since last commit. Skipping to prevent empty commit.


Saved to the hub: lintang-medical-llama3.2-id-a100-batch32-grad4-2024-10-15_12.47.17


In [None]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

# Epoch 3

In [30]:
artifact = run.use_artifact('uncategorized/model-rosy-star-9:v1', type='model')
artifact_dir = artifact.download()


[34m[1mwandb[0m:   11 of 11 files downloaded.  


In [27]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq

from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["test"],
    dataset_text_field = "text",
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 8, # 2 -> 114 jam  utk 3 epoch , batch_size 16 gak cukup gpu vramnya
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 3,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",

        report_to="wandb" if LOG_TO_WANDB else None,
        run_name=RUN_NAME,
        save_strategy="steps",
        hub_strategy="every_save",
        push_to_hub=True,
        hub_model_id=HUB_MODEL_NAME,
        hub_private_repo=True,
        save_steps=SAVE_STEPS,

        evaluation_strategy="steps",
        eval_steps=500,
        do_eval=True,
        per_device_eval_batch_size = 4,
        eval_accumulation_steps = 4
    ),
)



Map (num_proc=2):   0%|          | 0/59686 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/6632 [00:00<?, ? examples/s]

In [28]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map:   0%|          | 0/59686 [00:00<?, ? examples/s]

Map:   0%|          | 0/6632 [00:00<?, ? examples/s]

In [31]:
trainer.train(resume_from_checkpoint=artifact_dir)

  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 59,686 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 5,595
 "-____-"     Number of trainable parameters = 6,078,464
  checkpoint_rng_state = torch.load(rng_file)


Step,Training Loss,Validation Loss
4000,1.2004,1.195957
4500,1.1661,1.184279
5000,1.2136,1.17599
5500,1.2032,1.171589


[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-5000)... Done. 0.1s
[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-5595)... Done. 0.1s


TrainOutput(global_step=5595, training_loss=0.3884364219091107, metrics={'train_runtime': 14035.0413, 'train_samples_per_second': 12.758, 'train_steps_per_second': 0.399, 'total_flos': 4.692433165361431e+18, 'train_loss': 0.3884364219091107, 'epoch': 2.9998659697091545})

In [35]:
# used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
# used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
# used_percentage = round(used_memory         /max_memory*100, 3)
# lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
# print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
# print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
# print(f"Peak reserved memory = {used_memory} GB.")
# print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
# print(f"Peak reserved memoryassert run is wandb.run% of max memory = {used_percentage} %.")
# print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [36]:
model.push_to_hub(PROJECT_RUN_NAME, private=False)
tokenizer.push_to_hub(PROJECT_RUN_NAME, private=False)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")

README.md:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


Saved model to https://huggingface.co/lintang-medical-llama3.2-id-a100-batch32-grad4-2024-10-16_00.03.31


No files have been modified since last commit. Skipping to prevent empty commit.


Saved to the hub: lintang-medical-llama3.2-id-a100-batch32-grad4-2024-10-16_00.03.31


In [37]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

# Epoch 4
TODO

# Eval
TODO