In [None]:
! pip install transformers[torch]==4.55.0
! pip install accelerate
! pip install datasets
! pip install trl==0.22.0
! pip install tf-keras
! pip install numpy=1.26.0
# ssh tunneling to view tensor board
# ssh -L 6006:localhost:6006 ubuntu@129.159.45.31

# Start tensorboard UI
# tensorboard --logdir ./logs/test/ --host 127.0.0.1 --port 6006 

# Docs: https://huggingface.co/docs/trl/en/sft_trainer (version=V0.19.0)
# https://www.datacamp.com/tutorial/fine-tuning-qwen3

In [1]:
SYSTEM_PROMPT = '''You are a language learning evaluator assessing the complexity of an English sentence given its context.

Rubric:
1 (A1) – Very basic words and phrases; simple self-introduction; minimal grammar.
2 (A2) – Simple sentences; familiar everyday expressions; limited range.
3 (B1) – Can write or speak in connected sentences about familiar topics; some errors.
4 (B2) – Generally fluent; can discuss abstract topics; good grammar control.
5 (C1) – Flexible, natural use of language; few errors; advanced vocabulary.
6 (C2) – Near-native mastery; precise, nuanced expression; fully natural flow.

Please give a rating between 1-6 following the rubric above.
'''

PROMPT_TEMPLATE = '''

Context: {{ context }}
Sentence: {{ sentence }}
Rating (1-6):
'''



In [2]:
import json
from datasets import load_dataset

dataset = load_dataset("dataset/readme_folder")
print(dataset)


  from .autonotebook import tqdm as notebook_tqdm


DatasetDict({
    train: Dataset({
        features: ['Domain', 'Sub-domain', 'Paragraph', 'Context', 'Sentence', 'Rating'],
        num_rows: 3243
    })
    validation: Dataset({
        features: ['Domain', 'Sub-domain', 'Paragraph', 'Context', 'Sentence', 'Rating'],
        num_rows: 995
    })
    test: Dataset({
        features: ['Domain', 'Sub-domain', 'Paragraph', 'Context', 'Sentence', 'Rating'],
        num_rows: 995
    })
})




In [3]:

from jinja2 import Template

JINJA_PROMPT_TEMPLATE = Template(PROMPT_TEMPLATE)

def preprocess_function_wo_chat_template(example):
    prompt = SYSTEM_PROMPT + JINJA_PROMPT_TEMPLATE.render(context=example['Context'], sentence=example['Sentence'])
    return {
        "prompt": prompt,
        "completion": str(example["Rating"]),
    }


def preprocess_function_w_chat_template(example):
    prompt = JINJA_PROMPT_TEMPLATE.render(context=example['Context'], sentence=example['Sentence'])
    return {
        "prompt": [{"role": "system", "content": SYSTEM_PROMPT},
                   {"role": "user", "content":  prompt}],
        "completion": [{"role": "assistant", "content": str(example["Rating"])}]
    }

dataset = dataset.map(preprocess_function_wo_chat_template)
item = next(iter(dataset["train"]))
print(item['prompt'])
print(item['completion'])

You are a language learning evaluator assessing the complexity of an English sentence given its context.

Rubric:
1 (A1) – Very basic words and phrases; simple self-introduction; minimal grammar.
2 (A2) – Simple sentences; familiar everyday expressions; limited range.
3 (B1) – Can write or speak in connected sentences about familiar topics; some errors.
4 (B2) – Generally fluent; can discuss abstract topics; good grammar control.
5 (C1) – Flexible, natural use of language; few errors; advanced vocabulary.
6 (C2) – Near-native mastery; precise, nuanced expression; fully natural flow.

Please give a rating between 1-6 following the rubric above.


Context: In economics, the firm’s objective is assumed to be to maximize profits. Firms with  market power do this by capturing consumer surplus, and converting it to producer  surplus.
Sentence: In Figure 4.1, a monopoly finds the profit -maximizing price and quantity by  setting MR equal to MC.
Rating (1-6):
4.0


In [4]:
def compute_metrics(examples):
    print(examples)
    

In [None]:
from accelerate import notebook_launcher
from trl import SFTTrainer, SFTConfig

import transformers
import torch

def train():
    from trl import SFTTrainer, SFTConfig
    from transformers import AutoModelForCausalLM, AutoTokenizer


    model = transformers.AutoModelForCausalLM.from_pretrained(
                                            "google/gemma-3-270m-it",
                                            torch_dtype=torch.bfloat16,
                                            device_map="auto",
                                            attn_implementation="eager")
    tokenizer = transformers.AutoTokenizer.from_pretrained("google/gemma-3-270m-it")

    torch_dtype = model.dtype

    args = SFTConfig(
        output_dir="models/readme/gemma_1b",              # directory to save and repository id
        max_length=128,                         # max sequence length for model and packing of the dataset
        packing=False,                          # Groups multiple samples in the dataset into a single sequence
        num_train_epochs=5,                     # number of training epochs
        per_device_train_batch_size=4,          # batch size per device during training
        per_device_eval_batch_size=4, 
        gradient_checkpointing=False,           # Caching is incompatible with gradient checkpointing
        optim="adamw_torch_fused",              # use fused adamw optimizer
        logging_steps=10,                        # log every step
        save_strategy="epoch",                  # save checkpoint every epoch
        eval_strategy="epoch",                  # evaluate checkpoint every epoch
        learning_rate=1e-06,            # learning rate
        fp16=True if torch_dtype == torch.float16 else False,   # use float16 precision
        bf16=True if torch_dtype == torch.bfloat16 else False,  # use bfloat16 precision
        lr_scheduler_type="constant",           # use constant learning rate scheduler
        push_to_hub=False,                       # push model to hub
        # report_to="tensorboard",                # report metrics to tensorboard
        report_to="tensorboard",  # or "wandb", "comet_ml", etc.
        logging_dir="./logs/test",
        dataset_kwargs={
            "add_special_tokens": False, # Template with special tokens
            "append_concat_token": True, # Add EOS token as separator token between examples
        }
    )


    trainer = SFTTrainer(
            model,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"],
            args=args,
            # compute_metrics=compute_metrics
        )

    trainer.train()


# This will spawn multiple GPU processes directly from Jupyter:
# notebook_launcher(train, num_processes=2)
train()


Epoch,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
1,0.0,,1.367174,415104.0,0.0
