## **Install and import necessary libaries**

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U datasets
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U loralib
!pip install -q -U einops

In [None]:
import os
import torch
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig, GenerationConfig,
    TrainingArguments, Trainer, DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig, get_peft_model,
    prepare_model_for_kbit_training
)
from huggingface_hub import login
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import re

In [None]:
from huggingface_hub import login

# Log in to Hugging Face
login(token="your_hf_token_here")

## **Load pretrained LLM**

In [None]:
import torch
from transformers import pipeline

In [None]:
MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=[
        "q_proj",
        "up_proj",
        "o_proj",
        "k_proj",
        "down_proj",
        "gate_proj",
        "v_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0

    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainables%: {100 * trainable_params / all_param}"
    )

print_trainable_parameters(model)

trainable params: 24313856 || all params: 1827777536 || trainables%: 1.330241537665993


In [None]:
print(f"VRAM free: {torch.cuda.mem_get_info()[0] / 1024**3:.2f} GB")

VRAM free: 18.08 GB


## **Test pretrained model performance**

In [None]:
from transformers import GenerationConfig

In [None]:
generation_config = GenerationConfig(
    max_new_tokens=50,
    temperature=0.01,
    do_sample=False,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.3
)

In [None]:
# Llama-3's official system prompt structure
LLAMA3_SYSTEM_PROMPT = """You are a helpful AI assistant developed by Meta. Respond safely and accurately."""

In [None]:
# Test pretrained model performance
prompt = [
    {
        "role": "system",
        "content": LLAMA3_SYSTEM_PROMPT
    },
    {
        "role": "user",
        "content": """Analyze the given passage and question. Choose the best answer from the options below.

### Passage:
[This passage is adapted from George Eliot, Silas Marner. Originally published in 1861. Silas was a weaver and a notorious miser, but then the gold he had hoarded was stolen. Shortly after, Silas adopted a young child, Eppie, the daughter of an impoverished woman who had died suddenly.

    Unlike the gold which needed nothing, and must
be worshipped in close-locked solitude—which was
hidden away from the daylight, was deaf to the song
of birds, and started to no human tones—Eppie was a
creature of endless claims and ever-growing desires,
seeking and loving sunshine, and living sounds, and
living movements; making trial of everything, with
trust in new joy, and stirring the human kindness in
all eyes that looked on her. The gold had kept his
thoughts in an ever-repeated circle, leading to
nothing beyond itself; but Eppie was an object
compacted of changes and hopes that forced his
thoughts onward, and carried them far away from
their old eager pacing towards the same blank
limit—carried them away to the new things that
would come with the coming years, when Eppie
would have learned to understand how her father
Silas cared for her; and made him look for images of
that time in the ties and charities that bound together
the families of his neighbors. The gold had asked that
he should sit weaving longer and longer, deafened
and blinded more and more to all things except the
monotony of his loom and the repetition of his web;
but Eppie called him away from his weaving, and
made him think all its pauses a holiday, reawakening
his senses with her fresh life, even to the old
winter-flies that came crawling forth in the early
spring sunshine, and warming him into joy because
she had joy.
    And when the sunshine grew strong and lasting,
so that the buttercups were thick in the meadows,
Silas might be seen in the sunny mid-day, or in the
late afternoon when the shadows were lengthening
under the hedgerows, strolling out with uncovered
head to carry Eppie beyond the Stone-pits to where
the flowers grew, till they reached some favorite bank
where he could sit down, while Eppie toddled to
pluck the flowers, and make remarks to the winged
things that murmured happily above the bright
petals, calling “Dad-dad’s” attention continually by
bringing him the flowers. Then she would turn her
ear to some sudden bird-note, and Silas learned to
please her by making signs of hushed stillness, that
they might listen for the note to come again: so that
when it came, she set up her small back and laughed
with gurgling triumph. Sitting on the banks in this
way, Silas began to look for the once familiar herbs
again; and as the leaves, with their unchanged outline
and markings, lay on his palm, there was a sense of
crowding remembrances from which he turned away
timidly, taking refuge in Eppie’s little world, that lay
lightly on his enfeebled spirit.
    As the child’s mind was growing into knowledge,
his mind was growing into memory: as her life
unfolded, his soul, long stupefied in a cold narrow
prison, was unfolding too, and trembling gradually
into full consciousness.
    It was an influence which must gather force with
every new year: the tones that stirred Silas’ heart
grew articulate, and called for more distinct answers;
shapes and sounds grew clearer for Eppie’s eyes and
ears, and there was more that “Dad-dad” was
imperatively required to notice and account for.
Also, by the time Eppie was three years old, she
developed a fine capacity for mischief, and for
devising ingenious ways of being troublesome, which
found much exercise, not only for Silas’ patience, but
for his watchfulness and penetration. Sorely was poor
Silas puzzled on such occasions by the incompatible
demands of love.]

### Question:
Which statement best describes a technique the narrator uses to represent Silas's character before he adopted Eppie?

### Choices:
A) The narrator emphasizes Silas's former obsession with wealth by depicting his gold as requiring certain behaviors on his part.
B) The narrator underscores Silas's former greed by describing his gold as seeming to reproduce on its own.
C) The narrator hints at Silas's former antisocial attitude by contrasting his present behavior toward his neighbors with his past behavior toward them.
D) The narrator demonstrates Silas's former lack of self-awareness by implying that he is unable to recall life before Eppie.

Respond ONLY with the letter and full text of the correct answer choice."""
    }
]

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Apply chat template (text only)
chat_text = tokenizer.apply_chat_template(
    prompt,
    add_generation_prompt=True,
    tokenize=False
)

# Tokenize
inputs = tokenizer(
    chat_text,
    return_tensors="pt"
).to(device)

# Generate output
with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        generation_config=generation_config,
    )


# Decode output
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

if "<|assistant|>" in output_text:
    print(output_text.split("<|assistant|>")[-1].strip())
else:
    print(output_text.strip())

## **Fine-tuning LLM**

In [None]:
data = load_dataset('emozilla/sat-reading')

In [None]:
data

DatasetDict({
    train: Dataset({
        features: ['text', 'answer', 'requires_line', 'id'],
        num_rows: 298
    })
    validation: Dataset({
        features: ['text', 'answer', 'requires_line', 'id'],
        num_rows: 39
    })
    test: Dataset({
        features: ['text', 'answer', 'requires_line', 'id'],
        num_rows: 38
    })
})

In [None]:
type(data)

datasets.dataset_dict.DatasetDict

In [None]:
data["train"]

Dataset({
    features: ['text', 'answer', 'requires_line', 'id'],
    num_rows: 298
})

In [None]:
print(data["train"]['text'][0])

SAT READING COMPREHENSION TEST

This passage is adapted from George Eliot, Silas Marner.
Originally published in 1861. Silas was a weaver and a
notorious miser, but then the gold he had hoarded was
stolen. Shortly after, Silas adopted a young child, Eppie, the
daughter of an impoverished woman who had died
suddenly.

    Unlike the gold which needed nothing, and must
be worshipped in close-locked solitude—which was
hidden away from the daylight, was deaf to the song
of birds, and started to no human tones—Eppie was a
creature of endless claims and ever-growing desires,
seeking and loving sunshine, and living sounds, and
living movements; making trial of everything, with
trust in new joy, and stirring the human kindness in
all eyes that looked on her. The gold had kept his
thoughts in an ever-repeated circle, leading to
nothing beyond itself; but Eppie was an object
compacted of changes and hopes that forced his
thoughts onward, and carried them far away from
their old eager pacing towa

In [None]:
print(data["train"]["answer"][0])

A


In [None]:
import re

def extract_sections(text):
    """Parse raw SAT text into structured sections"""
    sections = {
        'passage': '',
        'question': '',
        'choices': [],
        'answer_letter': ''
    }

    answer_part = text.split('Answer:')[-1].strip()
    sections['answer_letter'] = answer_part[0] if answer_part else ''

    content = text.split('SAT READING COMPREHENSION TEST')[-1].split('Answer:')[0]
    blocks = [b.strip() for b in content.split('\n\n') if b.strip()]

    passage_lines = []
    for line in blocks:
        if line.startswith('Question'):
            break
        passage_lines.append(line)
    sections['passage'] = '\n'.join(passage_lines).strip()

    for block in blocks:
        if block.startswith('Question'):
            lines = block.split('\n')
            question_lines = []
            choice_lines = []

            for line in lines[1:]:
                if re.match(r'^[A-D]\)', line.strip()):
                    choice_lines.append(line.strip())
                else:
                    question_lines.append(line.strip())

            sections['question'] = ' '.join(question_lines).strip()
            sections['choices'] = choice_lines

    return sections

In [None]:
def map_answer(text, letter):
    """Match answer letter with full choice text"""
    sections = extract_sections(text)
    for choice in sections['choices']:
        if choice.startswith(f"{letter})"):
            return choice
    return letter  # Fallback if not found

In [None]:
def generate_prompt(text, answer_letter):
    sections = extract_sections(text)

    choices_text = '\n'.join(sections['choices'])

    return [
        {
            "role": "system",
            "content": LLAMA3_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"""Read the passage and answer the question.

### Passage:
{sections['passage']}

### Question:
{sections['question']}

### Choices:
{choices_text}

Respond with ONLY the letter and full text of the correct answer."""
        },
        {
            "role": "assistant",
            "content": map_answer(text, answer_letter)
        }
    ]


In [None]:
def generate_and_tokenize_prompt(user_input, answer):
    try:
        full_prompt = generate_prompt(user_input, answer)

        prompt_str = tokenizer.apply_chat_template(
            full_prompt,
            tokenize=False,
            add_generation_prompt=False
        )

        tokenized = tokenizer(
            prompt_str,
            padding="max_length",
            truncation=True,
            max_length=1506,
            return_tensors="pt"
        )

        input_ids = tokenized["input_ids"][0]
        labels = input_ids.clone()

        return {
            "input_ids": input_ids,
            "attention_mask": tokenized["attention_mask"][0],
            "labels": labels
        }

    except Exception as e:
        print(f"Error processing sample: {e}")
        return None

In [None]:
sample_text = data["train"]['text'][0]
sample_answer = data["train"]['answer'][0]

example_messages = generate_prompt(sample_text, sample_answer)
prompt_text = tokenizer.apply_chat_template(
    example_messages,
    tokenize=False,
    add_generation_prompt=False
)
print("=== Generated Prompt ===")
print(prompt_text)

=== Generated Prompt ===
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 21 Apr 2025

You are a helpful AI assistant developed by Meta. Respond safely and accurately.<|eot_id|><|start_header_id|>user<|end_header_id|>

Read the passage and answer the question.

### Passage:
This passage is adapted from George Eliot, Silas Marner.
Originally published in 1861. Silas was a weaver and a
notorious miser, but then the gold he had hoarded was
stolen. Shortly after, Silas adopted a young child, Eppie, the
daughter of an impoverished woman who had died
suddenly.
Unlike the gold which needed nothing, and must
be worshipped in close-locked solitude—which was
hidden away from the daylight, was deaf to the song
of birds, and started to no human tones—Eppie was a
creature of endless claims and ever-growing desires,
seeking and loving sunshine, and living sounds, and
living movements; making trial of everything, with
trust in new joy, and

In [None]:
tokenized_sample = generate_and_tokenize_prompt(sample_text, sample_answer)
if tokenized_sample:
    print("\n=== Tokenized Sample ===")
    print(f"Input IDs shape: {tokenized_sample['input_ids'].shape}")
    print(f"Sample decoded back:")
    print(tokenizer.decode(tokenized_sample['input_ids'], skip_special_tokens=True))


=== Tokenized Sample ===
Input IDs shape: torch.Size([1506])
Sample decoded back:
system

Cutting Knowledge Date: December 2023
Today Date: 21 Apr 2025

You are a helpful AI assistant developed by Meta. Respond safely and accurately.user

Read the passage and answer the question.

### Passage:
This passage is adapted from George Eliot, Silas Marner.
Originally published in 1861. Silas was a weaver and a
notorious miser, but then the gold he had hoarded was
stolen. Shortly after, Silas adopted a young child, Eppie, the
daughter of an impoverished woman who had died
suddenly.
Unlike the gold which needed nothing, and must
be worshipped in close-locked solitude—which was
hidden away from the daylight, was deaf to the song
of birds, and started to no human tones—Eppie was a
creature of endless claims and ever-growing desires,
seeking and loving sunshine, and living sounds, and
living movements; making trial of everything, with
trust in new joy, and stirring the human kindness in
all eyes 

In [None]:
from sklearn.model_selection import train_test_split

training_samples = []
for sample in tqdm(data['train']):
    try:
        # preprocessing data
        processed_text = sample['text'].replace('SAT READING COMPREHENSION TEST', '').strip()
        processed_answer = map_answer(sample['text'], sample['answer'].strip())

        # create sample
        tokenized_sample = generate_and_tokenize_prompt(processed_text, processed_answer)
        if tokenized_sample is not None:
            training_samples.append(tokenized_sample)
    except Exception as e:
        print(f"Skipping invalid sample: {e}")

training_samples = [s for s in training_samples if s is not None]

train_samples, val_samples = train_test_split(training_samples, test_size=0.1, random_state=42)
train_dataset = Dataset.from_list(train_samples)
eval_dataset = Dataset.from_list(val_samples)

100%|██████████| 298/298 [00:00<00:00, 335.27it/s]


In [None]:
# check length samples
train_lengths = [len(x["input_ids"]) for x in train_dataset]
eval_lengths = [len(x["input_ids"]) for x in eval_dataset]
all_lengths = train_lengths + eval_lengths
print(f"Min: {min(all_lengths)}, Max: {max(all_lengths)}, Avg: {sum(all_lengths)/len(all_lengths):.1f}")

Min: 1506, Max: 1506, Avg: 1506.0


### **Training**

In [None]:
from transformers import TrainerCallback
from rich.console import Console
from rich.table import Table

class LogLossCallback(TrainerCallback):
    def __init__(self):
        self.console = Console()
        self.table = Table(show_header=True, header_style="bold magenta")
        self.table.add_column("Step", justify="right")
        self.table.add_column("Training Loss", justify="right")
        self.logged_steps = set()

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None and "loss" in logs:
            step = state.global_step
            if step not in self.logged_steps:
                loss = logs["loss"]
                self.table.add_row(str(step), f"{loss:.6f}")
                self.logged_steps.add(step)

                if step % 10 == 0:
                    self.console.print(self.table)


In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=2,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=3,
    logging_steps=10,
    output_dir="llama3-8b-sat-reading",
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    eval_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,
    report_to="none",
    remove_unused_columns=False
)


data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    pad_to_multiple_of=8
)

trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=training_args,
    data_collator=data_collator,
    callbacks=[LogLossCallback()]
)

# Quantization-aware training settings
model.config.use_cache = False
model.enable_input_require_grads()
model = torch.compile(model)

trainer.train()

### **Test prediction**

In [None]:
print(data["test"]['text'][1])

SAT READING COMPREHENSION TEST

This passage is adapted from Mary Helen Stefaniak, The
Cailiffs of Baghdad, Georgia: A Novel. ©2010 by Mary Helen
Stefaniak.

    Miss Grace Spivey arrived in Threestep, Georgia,
in August 1938. She stepped off the train wearing a
pair of thick-soled boots suitable for hiking, a navy
Line blue dress, and a little white tam that rode the waves
of her red hair at a gravity-defying angle. August was
a hellish month to step off the train in Georgia,
although it was nothing, she said, compared to the
119 degrees that greeted her when she arrived one
time in Timbuktu, which, she assured us, was a real
place in Africa. I believe her remark irritated some of
the people gathered to welcome her on the burned
grass alongside the tracks. When folks are sweating
through their shorts, they don’t like to hear that this
is nothing compared to someplace else. Irritated or
not, the majority of those present were inclined to see
the arrival of the new schoolteacher in a po

In [None]:
print("Ground truth: ", data["test"]['answer'][1])

Ground truth:  D


In [None]:
import torch
import re
from transformers import GenerationConfig

generation_config = GenerationConfig(
    max_new_tokens=64,
    temperature=0.0,
    top_p=1.0,
    do_sample=False,
    repetition_penalty=1.0,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

def extract_answer(output_text):
    if "<|assistant|>" in output_text:
        answer_part = output_text.split("<|assistant|>")[-1].strip()
    else:
        answer_part = output_text.split("assistant")[-1].strip()

    match = re.search(r"^([A-D])\)\s*([^\n\(]+)", answer_part, re.MULTILINE)

    if match:
        return f"{match.group(1)}) {match.group(2).strip()}"
    else:
        clean_lines = [line for line in answer_part.split('\n') if not line.startswith('**')]
        return clean_lines[0].strip() if clean_lines else answer_part

def format_test_prompt(text, answer_letter=None):
    """Format input text as chat conversation (for prediction or test)"""
    sections = extract_sections(text)

    # Build choices block
    choices_text = '\n'.join(sections['choices'])

    user_prompt = f"""Read the passage and answer the question.

### Passage:
{sections['passage']}

### Question:
{sections['question']}

### Choices:
{choices_text}

Respond with ONLY the letter and full text of the correct answer."""

    messages = [
        {
            "role": "system",
            "content": LLAMA3_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": user_prompt
        }
    ]

    if answer_letter:
        messages.append({
            "role": "assistant",
            "content": map_answer(text, answer_letter)
        })

    return messages

def predict(text):
    messages = format_test_prompt(text)

    prompt_text = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False
    )

    inputs = tokenizer(prompt_text, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            generation_config=generation_config
        )

    # Decode
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # === DEBUG ===
    print("=== Full Decoded Output ===")
    print(output_text)

    # Only answer
    return extract_answer(output_text)


In [None]:
test_sample_idx = 4
input_text = data["test"]['text'][test_sample_idx]
true_answer = data["test"]['answer'][test_sample_idx]

predicted_answer = predict(input_text)

true_answer_full = map_answer(input_text, true_answer)

print("\n=== Final Result ===")
print(f"[Model Prediction]\n{predicted_answer}")
print(f"\n[Ground Truth]\n{true_answer_full}")

=== Full Decoded Output ===
system

Cutting Knowledge Date: December 2023
Today Date: 21 Apr 2025

You are a helpful AI assistant developed by Meta. Respond safely and accurately.user

Read the passage and answer the question.

### Passage:
This passage is adapted from Mary Helen Stefaniak, The
Cailiffs of Baghdad, Georgia: A Novel. ©2010 by Mary Helen
Stefaniak.
Miss Grace Spivey arrived in Threestep, Georgia,
in August 1938. She stepped off the train wearing a
pair of thick-soled boots suitable for hiking, a navy
Line blue dress, and a little white tam that rode the waves
of her red hair at a gravity-defying angle. August was
a hellish month to step off the train in Georgia,
although it was nothing, she said, compared to the
119 degrees that greeted her when she arrived one
time in Timbuktu, which, she assured us, was a real
place in Africa. I believe her remark irritated some of
the people gathered to welcome her on the burned
grass alongside the tracks. When folks are sweating
thro

## **Evaluation**

In [None]:
from tqdm import tqdm

def evaluate(test_dataset, max_samples=None):
    """
    Evaluate model accuracy on test set
    Args:
        test_dataset: Dataset object containing 'text' and 'answer'
        max_samples: Optional limit for quick testing
    """
    correct = 0
    total = 0
    wrong_samples = []

    # Process samples with progress bar
    for idx in tqdm(range(len(test_dataset[:max_samples]['text']))):
        try:
            text = test_dataset['text'][idx]
            true_answer = test_dataset['answer'][idx].strip().upper()

            # Get model prediction
            predicted = predict(text)

            # Extract first valid choice letter from prediction
            predicted_letter = re.search(r'\b([A-D])\b', predicted.upper())
            if predicted_letter:
                predicted_letter = predicted_letter.group(1)
            else:
                predicted_letter = None

            # Compare with ground truth
            if predicted_letter == true_answer:
                correct += 1
            else:
                wrong_samples.append({
                    'text': text,
                    'predicted': predicted,
                    'true': true_answer
                })

            total += 1

        except Exception as e:
            print(f"Error processing sample {idx}: {str(e)}")

    # Calculate metrics
    accuracy = correct / total if total > 0 else 0

    # Print summary
    print(f"\n=== Evaluation Results ===")
    print(f"Correct: {correct}/{total}")
    print(f"Accuracy: {accuracy:.2%}")
    print(f"Wrong samples saved in 'wrong_samples' list")

    return {
        'accuracy': accuracy,
        'total': total,
        'wrong_samples': wrong_samples
    }

# Usage
test_results = evaluate(data['test'])

In [None]:
for i, sample in enumerate(test_results['wrong_samples'][:5]):
    print(f"### Sample {i+1}")
    print(f"Ground Truth: {sample['true']}")
    print(f"Predicted: {sample['predicted']}")
    print("Passage:")
    print(extract_sections(sample['text'])['passage'][:200] + "...")
    print("\n" + "-"*50)

### Sample 1
Ground Truth: D
Predicted: A) sympathy, because they assume that she is experiencing intense heat for the first time.
Passage:
This passage is adapted from Mary Helen Stefaniak, The
Cailiffs of Baghdad, Georgia: A Novel. ©2010 by Mary Helen
Stefaniak.
Miss Grace Spivey arrived in Threestep, Georgia,
in August 1938. She steppe...

--------------------------------------------------
### Sample 2
Ground Truth: A
Predicted: D) Miss Spivey herself.
Passage:
This passage is adapted from Mary Helen Stefaniak, The
Cailiffs of Baghdad, Georgia: A Novel. ©2010 by Mary Helen
Stefaniak.
Miss Grace Spivey arrived in Threestep, Georgia,
in August 1938. She steppe...

--------------------------------------------------
### Sample 3
Ground Truth: C
Predicted: A) delighted.
Passage:
This passage is adapted from Mary Helen Stefaniak, The
Cailiffs of Baghdad, Georgia: A Novel. ©2010 by Mary Helen
Stefaniak.
Miss Grace Spivey arrived in Threestep, Georgia,
in August 1938. She steppe...

-------

In [None]:
model.save_pretrained("trained-model")

## **Inference**

In [None]:
from peft import PeftConfig, PeftModel

def format_inference_prompt(text):
    sections = extract_sections(text)
    choices_text = '\n'.join(sections['choices'])

    return [
        {"role": "system", "content": LLAMA3_SYSTEM_PROMPT},
        {"role": "user", "content": f"""Read the passage and answer the question.

### Passage: {sections['passage']}
### Question: {sections['question']}
### Choices: {choices_text}

Respond with ONLY the letter and full text of the correct answer."""}
]

PEFT_MODEL = "KoiiVN/llama3-3B-peft-SAT-reading-v2"

# Load config v& model
config = PeftConfig.from_pretrained(PEFT_MODEL)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
model = PeftModel.from_pretrained(model, PEFT_MODEL)

# Tokenizer & generation config
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

generation_config = GenerationConfig(
    max_new_tokens=128,
    temperature=0.01,
    do_sample=False,
    repetition_penalty=1.15,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    forced_eos_token_id=tokenizer.eos_token_id
)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Inference loop
for i in range(5):
    print("="*100)

    input_text = data["test"]['text'][i]
    true_answer = data["test"]['answer'][i]

    messages = format_inference_prompt(input_text)
    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            generation_config=generation_config
        )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=False)

    if "<|assistant|>" in full_output:
        response = full_output.split("<|assistant|>")[1].replace("<|eot_id|>", "").strip()
    else:
        response = full_output.replace(prompt, "").strip()

    print(f"=== Sample {i+1} ===")
    print(f"[Question]\n{messages[1]['content']}")
    print(f"\n[Ground Truth] {true_answer}")
    print(f"[Prediction] {response}")
    print("\n" + "-"*50)

In [None]:
def custom_predict(passage: str, question: str, choices: list):
    choices_text = '\n'.join(choices)

    messages = [
        {
            "role": "system",
            "content": LLAMA3_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"""Read the passage and answer the question.

### Passage:
{passage}

### Question:
{question}

### Choices:
{choices_text}

Respond with ONLY the letter and full text of the correct answer."""
        }
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            generation_config=generation_config
        )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=False)

    if "<|assistant|>" in full_output:
        response = full_output.split("<|assistant|>")[1].replace("<|eot_id|>", "").strip()
    else:
        response = full_output.replace(prompt, "").strip()

    return response

In [None]:
custom_passage = """
This passage is adapted from F. Scott Fitzgerald, The Great Gatsby.
"In my younger and more vulnerable years my father gave me some advice that I’ve been turning over in my mind ever since. ‘Whenever you feel like criticizing anyone,’ he told me, ‘just remember that all the people in this world haven’t had the advantages that you’ve had.’ He didn’t say any more, but we’ve always been unusually communicative in a reserved way, and I understood that he meant a great deal more than that. In consequence, I’m inclined to reserve all judgments, a habit that has opened up many curious natures to me and also made me the victim of not a few veteran bores."
"""

custom_question = "What is the primary purpose of the narrator’s recollection of his father’s advice?"
custom_choices = [
    "A) To explain his reluctance to judge others", #A is correct, just test
    "B) To highlight his privileged upbringing",
    "C) To criticize his father’s moral values",
    "D) To foreshadow future conflicts in the story"
]

prediction = custom_predict(custom_passage, custom_question, custom_choices)
print("\n=== Custom Test Result ===")
print("[Prediction]", prediction)


=== Custom Test Result ===
[Prediction] <|begin_of_text|>A) To explain his reluctance to judge others<|eot_id|>
