In [None]:
!pip install -q transformers datasets accelerate evaluate peft huggingface_hub bitsandbytes

##Train MedMCQA Dataset using `Llama-3.2-1B`

###**1. Load Data**

In [None]:
from datasets import load_dataset
from datasets import DatasetDict

raw_dataset = {
    "train": load_dataset("openlifescienceai/medmcqa", split="train"),
    "valid": load_dataset("openlifescienceai/medmcqa", split="validation"),
}
raw_dataset = DatasetDict(raw_dataset)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/85.9M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/936k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/1.48M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/182822 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6150 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/4183 [00:00<?, ? examples/s]

In [None]:
raw_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'opa', 'opb', 'opc', 'opd', 'cop', 'choice_type', 'exp', 'subject_name', 'topic_name'],
        num_rows: 182822
    })
    valid: Dataset({
        features: ['id', 'question', 'opa', 'opb', 'opc', 'opd', 'cop', 'choice_type', 'exp', 'subject_name', 'topic_name'],
        num_rows: 4183
    })
})

###**2. Dataloader**

In [None]:
import torch

id2label = {0: "A", 1: "B", 2: "C", 3: "D"}


def preprocess_function(examples, max_seq_length, tokenizer):
    sentences = []
    labels = []
    for example in zip(
        examples["question"],
        examples["exp"],
        examples["opa"],
        examples["opb"],
        examples["opc"],
        examples["opd"],
        examples["cop"],
    ):
        question = example[0]
        context = example[1]
        opa = example[2]
        opb = example[3]
        opc = example[4]
        opd = example[5]
        choices = f"A{opa}. \n B. {opb}. \n C. {opc} \n D. {opd}"
        prompt = f"Context: {context}. Question: {question}. There are four answers as follows: {choices}. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:"
        sentences.append(prompt)

        answer = id2label[int(example[6])]
        labels.append(answer)

    model_inputs = tokenizer(
        sentences, padding="max_length", max_length=max_seq_length, truncation=True
    )

    label_encodings = tokenizer(
        labels,
        padding="max_length",
        max_length=max_seq_length,
        truncation=True,
        add_special_tokens=True,
    )

    labels = label_encodings["input_ids"].copy()
    for i in range(len(labels)):
        labels[i] = [
            label if label != tokenizer.pad_token_id else -100 for label in labels[i]
        ]
    model_inputs["labels"] = labels
    return model_inputs

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    PeftModel,
    prepare_model_for_kbit_training,
)
from huggingface_hub import login
import torch

login(token="your-key")  # Request Accept

model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

In [None]:
tokenizer.pad_token_id = 128001
print(tokenizer.pad_token)

<|end_of_text|>


In [None]:
def keep_if_short(example, max_token_length=100):
    """Kiểm tra xem độ dài token của prompt có vượt quá giới hạn không."""
    context = example["exp"]
    question = example["question"]
    opa = example["opa"]
    opb = example["opb"]
    opc = example["opc"]
    opd = example["opd"]

    # Tạo các lựa chọn
    choices = f"A. {opa}\nB. {opb}\nC. {opc}\nD. {opd}"

    # Tạo prompt
    prompt = f"Context: {context}. Question: {question}. There are four answers as follows: {choices}. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:"

    # Tokenize prompt và kiểm tra số lượng token
    input_ids = tokenizer(prompt, truncation=False)["input_ids"]
    return len(input_ids) <= max_token_length


filtered_datasets = {}
max_samples_train = 20000
max_samples_valid = 1000
print("Bắt đầu lọc dataset...")

for split, data in raw_dataset.items():
    print(f"\nĐang xử lý split: {split}")
    initial_count = len(data)
    print(f"Số lượng mẫu ban đầu: {initial_count}")

    filtered_data = data.filter(keep_if_short, fn_kwargs={"max_token_length": 100})

    filtered_count = len(filtered_data)
    print(f"Số lượng mẫu sau khi lọc (<= 100 tokens): {filtered_count}")
    print(f"Đã loại bỏ {initial_count - filtered_count} mẫu do quá dài.")

    if split == "train":
        final_data = filtered_data.select(range(min(filtered_count, max_samples_train)))
        print(
            f"Chọn tối đa {max_samples_train} mẫu cho split '{split}'. Số lượng cuối cùng: {len(final_data)}"
        )
    elif split == "valid":
        final_data = filtered_data.select(range(min(filtered_count, max_samples_valid)))
        print(
            f"Chọn tối đa {max_samples_valid} mẫu cho split '{split}'. Số lượng cuối cùng: {len(final_data)}"
        )
    else:
        final_data = filtered_data

    filtered_datasets[split] = final_data

new_dataset = DatasetDict(filtered_datasets)

Bắt đầu lọc dataset...

Đang xử lý split: train
Số lượng mẫu ban đầu: 182822


Filter:   0%|          | 0/182822 [00:00<?, ? examples/s]

Số lượng mẫu sau khi lọc (<= 100 tokens): 38619
Đã loại bỏ 144203 mẫu do quá dài.
Chọn tối đa 20000 mẫu cho split 'train'. Số lượng cuối cùng: 20000

Đang xử lý split: valid
Số lượng mẫu ban đầu: 4183


Filter:   0%|          | 0/4183 [00:00<?, ? examples/s]

Số lượng mẫu sau khi lọc (<= 100 tokens): 1777
Đã loại bỏ 2406 mẫu do quá dài.
Chọn tối đa 1000 mẫu cho split 'valid'. Số lượng cuối cùng: 1000


In [None]:
new_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'opa', 'opb', 'opc', 'opd', 'cop', 'choice_type', 'exp', 'subject_name', 'topic_name'],
        num_rows: 20000
    })
    valid: Dataset({
        features: ['id', 'question', 'opa', 'opb', 'opc', 'opd', 'cop', 'choice_type', 'exp', 'subject_name', 'topic_name'],
        num_rows: 1000
    })
})

In [None]:
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     torch_dtype=torch.bfloat16
# )

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
)

peft_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    bias="none",
    task_type="CAUSAL_LM",
)

model.config.use_cache = False

model = prepare_model_for_kbit_training(
    model, gradient_checkpointing_kwargs={"use_reentrant": False}
)


# model = PeftModel.from_pretrained(model, "wanhin/Llama-3.2-1B-MedMCQA-lora")
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

trainable params: 22,544,384 || all params: 1,258,358,784 || trainable%: 1.7916


In [None]:
# for name, param in model.named_parameters():
#     if "lora" in name:
#         param.requires_grad = True
#     else:
#         param.requires_grad = False

# # Set to training mode
# model.train()

# # Verify the trainable parameters
# model.print_trainable_parameters()

In [None]:
from functools import partial

processed_dataset = new_dataset.map(
    partial(preprocess_function, max_seq_length=128, tokenizer=tokenizer),
    batched=True,
    load_from_cache_file=False,
    remove_columns=[
        "question",
        "exp",
        "cop",
        "opa",
        "opb",
        "opc",
        "opd",
        "subject_name",
        "topic_name",
        "id",
        "choice_type",
    ],
    desc="Running tokenizer on dataset",
)

Running tokenizer on dataset:   0%|          | 0/20000 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
processed_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 20000
    })
    valid: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 1000
    })
})

In [None]:
processed_dataset["train"]["labels"][:1]

[[128000,
  34,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,


In [None]:
processed_dataset["valid"]["labels"][:1]

[[128000,
  33,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,
  -100,


###**3. Metric**

In [None]:
import numpy as np
import evaluate
from transformers import EvalPrediction

label2id = {"A": "0", "B": "1", "C": "2", "D": "3"}


def postprocess_text(predictions, labels):
    predictions = [prediction.strip() for prediction in predictions]
    labels = [label2id[label.strip()] for label in labels]

    for idx in range(len(predictions)):
        if predictions[idx] in label2id:
            predictions[idx] = label2id[predictions[idx]]
        else:
            predictions[idx] = "-100"
    return predictions, labels


def load_metric(metric_name):
    if metric_name == "accuracy":
        return evaluate.load("accuracy")
    elif metric_name == "f1":
        return evaluate.load("f1")


def seq2seq_compute_metrics(tokenizer, metric):
    def compute_metrics(eval_pred: EvalPrediction):
        nonlocal tokenizer, metric
        predictions, labels = eval_pred
        if isinstance(predictions, tuple):
            predictions = predictions[0]

        original_padding_side = tokenizer.padding_side

        tokenizer.padding_side = "left"
        predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
        decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)

        tokenizer.padding_side = "right"
        label_ids = np.where(labels != -100, labels, tokenizer.pad_token_id)
        decoded_labels = tokenizer.batch_decode(label_ids, skip_special_tokens=True)

        tokenizer.padding_side = original_padding_side

        print("Decoded predictions:", decoded_preds[:3])
        print("Decoded labels:", decoded_labels[:3])

        extracted_preds = []
        for pred in decoded_preds:
            before, sep, after = pred.partition("The answer is:")
            if sep:
                after = after.lstrip()
                token = after[0] if after else "-100"
                extracted_preds.append(token)
            else:
                extracted_preds.append("-100")

        print("Extracted predictions:", extracted_preds[:3])

        processed_preds, processed_labels = postprocess_text(
            extracted_preds, decoded_labels
        )

        print("Post-processed predictions:", processed_preds[:3])
        print("Post-processed labels:", processed_labels[:3])

        result = metric.compute(
            predictions=processed_preds, references=processed_labels
        )
        return result

    return compute_metrics

In [None]:
metric = load_metric("accuracy")
compute_metrics = seq2seq_compute_metrics(tokenizer, metric)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

###**4.Trainer**

In [None]:
from transformers import (
    DataCollatorForSeq2Seq,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
)

In [None]:
model.config.pad_token_id = 128001
print("Pad Token ID:", model.config.pad_token_id)

Pad Token ID: 128001


In [None]:
label_pad_token_id = -100

data_collator = DataCollatorForSeq2Seq(
    tokenizer, model=model, label_pad_token_id=label_pad_token_id, pad_to_multiple_of=8
)

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir=f"./model/{model_name}",
    num_train_epochs=1,
    per_device_train_batch_size=32,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=32,
    eval_strategy="steps",
    save_strategy="steps",
    warmup_steps=10,
    learning_rate=5e-4,
    weight_decay=0.01,
    save_steps=156,
    eval_steps=52,
    save_total_limit=2,
    bf16=True,
    predict_with_generate=True,
    load_best_model_at_end=True,
    logging_dir="./logs",
    logging_steps=52,
    metric_for_best_model="accuracy",
    label_names=["labels"],
    report_to=[],
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["valid"],
)

In [None]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
52,0.8883,1.516311,0.004
104,0.7015,1.380453,0.287
156,0.7008,1.368542,0.272
208,0.7003,1.410622,0.267
260,0.6965,1.387401,0.263
312,0.6931,1.373996,0.261


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_tok

Decoded predictions: ['Context: None. Question: 27.\tThe lateral spread of dental caries is facilitated mostly  by the. There are four answers as follows: AEnamel spindles. \n B. Dentinoenamel junction. \n C. Enamel lamellae \n D. Striae of Retzius. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:amp. 1. 2. 3. 4. 5. 6.', 'Context: None. Question: Heavy forces on periodontal ligament causes:. There are four answers as follows: AHyalinization. \n B. Osteoclastic activity around tooth. \n C. Osteoblastic activity around tooth \n D. Crest bone resorption. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:amp.', 'Context: None. Question: The source of calcium ions of a calcific bridge in a\ntooth in which calcium hydroxide pulpotomy has been\nperformed is:. There are four answers as follows: ABlood vessel borne. \n B. Derided cells. \n C. Dentin \n D. From the calcium hydroxide. Please 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_tok

Decoded predictions: ['Context: None. Question: 27.\tThe lateral spread of dental caries is facilitated mostly  by the. There are four answers as follows: AEnamel spindles. \n B. Dentinoenamel junction. \n C. Enamel lamellae \n D. Striae of Retzius. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B.', 'Context: None. Question: Heavy forces on periodontal ligament causes:. There are four answers as follows: AHyalinization. \n B. Osteoclastic activity around tooth. \n C. Osteoblastic activity around tooth \n D. Crest bone resorption. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B.\nB. OsteBBB B B BB B BB BB BB', 'Context: None. Question: The source of calcium ions of a calcific bridge in a\ntooth in which calcium hydroxide pulpotomy has been\nperformed is:. There are four answers as follows: ABlood vessel borne. \n B. Derided cells. \n C. Dentin \n D. From the calcium hydroxide.

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_tok

Decoded predictions: ['Context: None. Question: 27.\tThe lateral spread of dental caries is facilitated mostly  by the. There are four answers as follows: AEnamel spindles. \n B. Dentinoenamel junction. \n C. Enamel lamellae \n D. Striae of Retzius. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B. EnCaine LamCae.', 'Context: None. Question: Heavy forces on periodontal ligament causes:. There are four answers as follows: AHyalinization. \n B. Osteoclastic activity around tooth. \n C. Osteoblastic activity around tooth \n D. Crest bone resorption. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:A. B. C. D.\nDavB. B. B. B. B.', 'Context: None. Question: The source of calcium ions of a calcific bridge in a\ntooth in which calcium hydroxide pulpotomy has been\nperformed is:. There are four answers as follows: ABlood vessel borne. \n B. Derided cells. \n C. Dentin \n D. From the calc

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_tok

Decoded predictions: ['Context: None. Question: 27.\tThe lateral spread of dental caries is facilitated mostly  by the. There are four answers as follows: AEnamel spindles. \n B. Dentinoenamel junction. \n C. Enamel lamellae \n D. Striae of Retzius. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B.', 'Context: None. Question: Heavy forces on periodontal ligament causes:. There are four answers as follows: AHyalinization. \n B. Osteoclastic activity around tooth. \n C. Osteoblastic activity around tooth \n D. Crest bone resorption. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B. OsteBC. ', 'Context: None. Question: The source of calcium ions of a calcific bridge in a\ntooth in which calcium hydroxide pulpotomy has been\nperformed is:. There are four answers as follows: ABlood vessel borne. \n B. Derided cells. \n C. Dentin \n D. From the calcium hydroxide. Please answer with o

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_tok

Decoded predictions: ['Context: None. Question: 27.\tThe lateral spread of dental caries is facilitated mostly  by the. There are four answers as follows: AEnamel spindles. \n B. Dentinoenamel junction. \n C. Enamel lamellae \n D. Striae of Retzius. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B. D. 27. The lateral spread of dental carBBBBBB B', 'Context: None. Question: Heavy forces on periodontal ligament causes:. There are four answers as follows: AHyalinization. \n B. Osteoclastic activity around tooth. \n C. Osteoblastic activity around tooth \n D. Crest bone resorption. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:A. OsteB. OsteC. OsteD. ', 'Context: None. Question: The source of calcium ions of a calcific bridge in a\ntooth in which calcium hydroxide pulpotomy has been\nperformed is:. There are four answers as follows: ABlood vessel borne. \n B. Derided cells. \n C. 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
Setting `pad_tok

Decoded predictions: ['Context: None. Question: 27.\tThe lateral spread of dental caries is facilitated mostly  by the. There are four answers as follows: AEnamel spindles. \n B. Dentinoenamel junction. \n C. Enamel lamellae \n D. Striae of Retzius. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B. ', 'Context: None. Question: Heavy forces on periodontal ligament causes:. There are four answers as follows: AHyalinization. \n B. Osteoclastic activity around tooth. \n C. Osteoblastic activity around tooth \n D. Crest bone resorption. Please answer with only the letter (A, B, C, or D) corresponding to the correct choice. The answer is:B. Osteoclastic activity around tooth.\nC. OsteBBB\nD.', 'Context: None. Question: The source of calcium ions of a calcific bridge in a\ntooth in which calcium hydroxide pulpotomy has been\nperformed is:. There are four answers as follows: ABlood vessel borne. \n B. Derided cells. \n C. Dentin \n D. From

TrainOutput(global_step=312, training_loss=0.7301059991885455, metrics={'train_runtime': 8557.5557, 'train_samples_per_second': 2.337, 'train_steps_per_second': 0.036, 'total_flos': 1.5269335928930304e+16, 'train_loss': 0.7301059991885455, 'epoch': 0.9984})

##Predict ViMedMCQA

In [None]:
model.push_to_hub("your_huggingface/Llama-3.2-1B-MedMCQA-lora-4bit")
tokenizer.push_to_hub("your_huggingface/Llama-3.2-1B-MedMCQA-lora-4bit")

adapter_model.safetensors:   0%|          | 0.00/90.2M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/wanhin/Llama-3.2-1B-MedMCQA-lora-4bit/commit/9dd4081fa2c18461692b2e4d67f67a4e9f3e14ed', commit_message='Upload tokenizer', commit_description='', oid='9dd4081fa2c18461692b2e4d67f67a4e9f3e14ed', pr_url=None, repo_url=RepoUrl('https://huggingface.co/wanhin/Llama-3.2-1B-MedMCQA-lora-4bit', endpoint='https://huggingface.co', repo_type='model', repo_id='wanhin/Llama-3.2-1B-MedMCQA-lora-4bit'), pr_revision=None, pr_num=None)