## **Install and import necessary libaries**

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U datasets
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U loralib
!pip install -q -U einops

In [None]:
import os
import torch
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig, GenerationConfig,
    TrainingArguments, Trainer, DataCollatorForLanguageModeling
)
from peft import (
    LoraConfig, get_peft_model,
    prepare_model_for_kbit_training
)
from huggingface_hub import login
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import re

In [None]:
from huggingface_hub import login

# Log in to Hugging Face
login(token="your_hf_token_here")

## **Load pretrained LLM**

In [None]:
import torch
from transformers import pipeline

In [None]:
data = load_dataset('emozilla/sat-reading')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

(…)-00000-of-00001-be16864a4346f8b0.parquet:   0%|          | 0.00/291k [00:00<?, ?B/s]

(…)-00000-of-00001-6242383510343be0.parquet:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

(…)-00000-of-00001-8026e2bb5cef708b.parquet:   0%|          | 0.00/38.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/298 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/39 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/38 [00:00<?, ? examples/s]

In [None]:
data

DatasetDict({
    train: Dataset({
        features: ['text', 'answer', 'requires_line', 'id'],
        num_rows: 298
    })
    validation: Dataset({
        features: ['text', 'answer', 'requires_line', 'id'],
        num_rows: 39
    })
    test: Dataset({
        features: ['text', 'answer', 'requires_line', 'id'],
        num_rows: 38
    })
})

In [None]:
type(data)

datasets.dataset_dict.DatasetDict

In [None]:
data["train"]

Dataset({
    features: ['text', 'answer', 'requires_line', 'id'],
    num_rows: 298
})

In [None]:
print(data["train"]['text'][0])

SAT READING COMPREHENSION TEST

This passage is adapted from George Eliot, Silas Marner.
Originally published in 1861. Silas was a weaver and a
notorious miser, but then the gold he had hoarded was
stolen. Shortly after, Silas adopted a young child, Eppie, the
daughter of an impoverished woman who had died
suddenly.

    Unlike the gold which needed nothing, and must
be worshipped in close-locked solitude—which was
hidden away from the daylight, was deaf to the song
of birds, and started to no human tones—Eppie was a
creature of endless claims and ever-growing desires,
seeking and loving sunshine, and living sounds, and
living movements; making trial of everything, with
trust in new joy, and stirring the human kindness in
all eyes that looked on her. The gold had kept his
thoughts in an ever-repeated circle, leading to
nothing beyond itself; but Eppie was an object
compacted of changes and hopes that forced his
thoughts onward, and carried them far away from
their old eager pacing towa

In [None]:
import re

def extract_sections(text):
    """Parse raw SAT text into structured sections"""
    sections = {
        'passage': '',
        'question': '',
        'choices': [],
        'answer_letter': ''
    }

    answer_part = text.split('Answer:')[-1].strip()
    sections['answer_letter'] = answer_part[0] if answer_part else ''

    content = text.split('SAT READING COMPREHENSION TEST')[-1].split('Answer:')[0]
    blocks = [b.strip() for b in content.split('\n\n') if b.strip()]

    passage_lines = []
    for line in blocks:
        if line.startswith('Question'):
            break
        passage_lines.append(line)
    sections['passage'] = '\n'.join(passage_lines).strip()

    for block in blocks:
        if block.startswith('Question'):
            lines = block.split('\n')
            question_lines = []
            choice_lines = []

            for line in lines[1:]:
                if re.match(r'^[A-D]\)', line.strip()):
                    choice_lines.append(line.strip())
                else:
                    question_lines.append(line.strip())

            sections['question'] = ' '.join(question_lines).strip()
            sections['choices'] = choice_lines

    return sections

In [None]:
text = data["train"]["text"][0]
extract_sections(text)

{'passage': 'This passage is adapted from George Eliot, Silas Marner.\nOriginally published in 1861. Silas was a weaver and a\nnotorious miser, but then the gold he had hoarded was\nstolen. Shortly after, Silas adopted a young child, Eppie, the\ndaughter of an impoverished woman who had died\nsuddenly.\nUnlike the gold which needed nothing, and must\nbe worshipped in close-locked solitude—which was\nhidden away from the daylight, was deaf to the song\nof birds, and started to no human tones—Eppie was a\ncreature of endless claims and ever-growing desires,\nseeking and loving sunshine, and living sounds, and\nliving movements; making trial of everything, with\ntrust in new joy, and stirring the human kindness in\nall eyes that looked on her. The gold had kept his\nthoughts in an ever-repeated circle, leading to\nnothing beyond itself; but Eppie was an object\ncompacted of changes and hopes that forced his\nthoughts onward, and carried them far away from\ntheir old eager pacing towards t

In [None]:
def map_answer(text, letter):
    """Match answer letter with full choice text"""
    sections = extract_sections(text)
    for choice in sections['choices']:
        if choice.startswith(f"{letter})"):
            return choice
    return letter  # Fallback if not found

In [None]:
# Llama-3's official system prompt structure
LLAMA3_SYSTEM_PROMPT = """You are a helpful AI assistant developed by Meta. Respond safely and accurately."""

In [None]:
def generate_prompt(text, answer_letter):
    sections = extract_sections(text)

    choices_text = '\n'.join(sections['choices'])

    return [
        {
            "role": "system",
            "content": LLAMA3_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"""Read the passage and answer the question.

### Passage:
{sections['passage']}

### Question:
{sections['question']}

### Choices:
{choices_text}

Respond with ONLY the letter and full text of the correct answer."""
        },
        {
            "role": "assistant",
            "content": map_answer(text, answer_letter)
        }
    ]


In [None]:
def generate_and_tokenize_prompt(user_input, answer):
    try:
        full_prompt = generate_prompt(user_input, answer)

        prompt_str = tokenizer.apply_chat_template(
            full_prompt,
            tokenize=False,
            add_generation_prompt=False
        )

        tokenized = tokenizer(
            prompt_str,
            padding="max_length",
            truncation=True,
            max_length=1506,
            return_tensors="pt"
        )

        input_ids = tokenized["input_ids"][0]
        labels = input_ids.clone()

        return {
            "input_ids": input_ids,
            "attention_mask": tokenized["attention_mask"][0],
            "labels": labels
        }

    except Exception as e:
        print(f"Error processing sample: {e}")
        return None

## **Inference**

In [None]:
from peft import PeftConfig, PeftModel

PEFT_MODEL = "KoiiVN/llama3-3B-peft-SAT-reading-v2"

# Load config & model
config = PeftConfig.from_pretrained(PEFT_MODEL)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
model = PeftModel.from_pretrained(model, PEFT_MODEL)

# Tokenizer & generation config
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def format_inference_prompt(text):
    sections = extract_sections(text)
    choices_text = '\n'.join(sections['choices'])

    return [
        {"role": "system", "content": LLAMA3_SYSTEM_PROMPT},
        {"role": "user", "content": f"""Read the passage and answer the question.

### Passage: {sections['passage']}
### Question: {sections['question']}
### Choices: {choices_text}

Respond with ONLY the letter and full text of the correct answer."""}
    ]

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

generation_config = GenerationConfig(
    max_new_tokens=100,
    temperature=0.01,
    do_sample=False,
    repetition_penalty=1.15,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    forced_eos_token_id=tokenizer.eos_token_id
)

# Inference loop
for i in range(8):
    print("="*100)

    input_text = data["test"]['text'][i]
    true_answer = data["test"]['answer'][i]

    messages = format_inference_prompt(input_text)
    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            generation_config=generation_config
        )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=False)

    if "<|assistant|>" in full_output:
        response = full_output.split("<|assistant|>")[1].replace("<|eot_id|>", "").strip()
    else:
        response = full_output.replace(prompt, "").strip()

    print(f"=== Sample {i+1} ===")
    print(f"[Question]\n{messages[1]['content']}")
    print(f"[Prediction] {response}")
    print(f"\n[Ground Truth] {true_answer}")
    print("\n" + "-"*50)

In [None]:
def custom_predict(passage: str, question: str, choices: list):
    choices_text = '\n'.join(choices)

    messages = [
        {
            "role": "system",
            "content": LLAMA3_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"""Read the passage and answer the question.

### Passage:
{passage}

### Question:
{question}

### Choices:
{choices_text}

Respond with ONLY the letter and full text of the correct answer."""
        }
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=False
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            generation_config=generation_config
        )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=False)

    if "<|assistant|>" in full_output:
        response = full_output.split("<|assistant|>")[1].replace("<|eot_id|>", "").strip()
    else:
        response = full_output.replace(prompt, "").strip()

    return response

In [None]:
custom_passage = """
This passage is adapted from F. Scott Fitzgerald, The Great Gatsby.
"In my younger and more vulnerable years my father gave me some advice that I’ve been turning over in my mind ever since. ‘Whenever you feel like criticizing anyone,’ he told me, ‘just remember that all the people in this world haven’t had the advantages that you’ve had.’ He didn’t say any more, but we’ve always been unusually communicative in a reserved way, and I understood that he meant a great deal more than that. In consequence, I’m inclined to reserve all judgments, a habit that has opened up many curious natures to me and also made me the victim of not a few veteran bores."
"""

custom_question = "What is the primary purpose of the narrator’s recollection of his father’s advice?"
custom_choices = [
    "A) To explain his reluctance to judge others", #A is correct, just try testing
    "B) To highlight his privileged upbringing",
    "C) To criticize his father’s moral values",
    "D) To foreshadow future conflicts in the story"
]

prediction = custom_predict(custom_passage, custom_question, custom_choices)
print("\n=== Custom Test Result ===")
print("[Prediction]", prediction)