In [None]:
! pip install torch
! pip install transformers[torch]
! pip install accelerate
! pip install datasets
! pip install trl
# Docs: https://huggingface.co/docs/trl/en/sft_trainer (version=V0.19.0)
# https://www.datacamp.com/tutorial/fine-tuning-qwen3

In [12]:

SYSTEM_PROMPT = '''You are a language learning evaluator assessing the complexity of an English sentence given its context.

Rubric:
1 (A1) – Very basic words and phrases; simple self-introduction; minimal grammar.
2 (A2) – Simple sentences; familiar everyday expressions; limited range.
3 (B1) – Can write or speak in connected sentences about familiar topics; some errors.
4 (B2) – Generally fluent; can discuss abstract topics; good grammar control.
5 (C1) – Flexible, natural use of language; few errors; advanced vocabulary.
6 (C2) – Near-native mastery; precise, nuanced expression; fully natural flow.

Please give a rating between 1-6 following the rubric above.
'''

PROMPT_TEMPLATE = '''

Context: {{ context }}
Sentence: {{ sentence }}
Rating (1-6):
'''



In [13]:
import json
from datasets import load_dataset

dataset = load_dataset("dataset/readme")
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['Domain', 'Sub-domain', 'Paragraph', 'Context', 'Sentence', 'Rating'],
        num_rows: 3243
    })
    validation: Dataset({
        features: ['Domain', 'Sub-domain', 'Paragraph', 'Context', 'Sentence', 'Rating'],
        num_rows: 995
    })
    test: Dataset({
        features: ['Domain', 'Sub-domain', 'Paragraph', 'Context', 'Sentence', 'Rating'],
        num_rows: 995
    })
})


In [16]:

from jinja2 import Template

JINJA_PROMPT_TEMPLATE = Template(PROMPT_TEMPLATE)

def preprocess_function_wo_chat_template(example):
    prompt = SYSTEM_PROMPT + JINJA_PROMPT_TEMPLATE.render(context=example['Context'], sentence=example['Sentence'])
    return {
        "prompt": prompt,
        "completion": str(example["Rating"]),
    }


def preprocess_function_w_chat_template(example):
    prompt = JINJA_PROMPT_TEMPLATE.render(context=example['Context'], sentence=example['Sentence'])
    return {
        "prompt": [{"role": "system", "content": SYSTEM_PROMPT},
                   {"role": "user", "content":  prompt}],
        "completion": [{"role": "assistant", "content": str(example["Rating"])}]
    }

dataset = dataset.map(preprocess_function_wo_chat_template)
item = next(iter(dataset["train"]))
print(item['prompt'])
print(item['completion'])

Map: 100%|██████████| 3243/3243 [00:00<00:00, 13449.76 examples/s]
Map: 100%|██████████| 995/995 [00:00<00:00, 13712.91 examples/s]
Map: 100%|██████████| 995/995 [00:00<00:00, 13775.56 examples/s]

You are a language learning evaluator assessing the complexity of an English sentence given its context.

Rubric:
1 (A1) – Very basic words and phrases; simple self-introduction; minimal grammar.
2 (A2) – Simple sentences; familiar everyday expressions; limited range.
3 (B1) – Can write or speak in connected sentences about familiar topics; some errors.
4 (B2) – Generally fluent; can discuss abstract topics; good grammar control.
5 (C1) – Flexible, natural use of language; few errors; advanced vocabulary.
6 (C2) – Near-native mastery; precise, nuanced expression; fully natural flow.

Please give a rating between 1-6 following the rubric above.


Context: In economics, the firm’s objective is assumed to be to maximize profits. Firms with  market power do this by capturing consumer surplus, and converting it to producer  surplus.
Sentence: In Figure 4.1, a monopoly finds the profit -maximizing price and quantity by  setting MR equal to MC.
Rating (1-6):
4.0





In [4]:
# hyp_params = []
# for lr in [2e-4, 2e-5, 2e-6]:
#     for batch_size in [8, 16, 32, 64]:
#             hyp_params.append((lr, batch_size))

In [17]:
# from datasets import load_dataset
# from trl import SFTTrainer, SFTConfig
# import transformers
# import torch

# for lr, batch_size in hyp_params:

#     model = transformers.AutoModelForCausalLM.from_pretrained(
#                                             "downloads/qwen-0.6B",
#                                             torch_dtype=torch.bfloat16,
#                                             device_map="auto")
#     tokenizer = transformers.AutoTokenizer.from_pretrained("downloads/qwen-0.6B")


#     training_args = SFTConfig(
#         max_length=1024,
#         output_dir=f"downloads/models/sft_hypertune_qwen3/{str(lr)}_{str(batch_size)}",
#         packing=False,
#         per_device_train_batch_size=batch_size,
#         eval_strategy="epoch",
#         save_strategy="epoch",
#         num_train_epochs=10,
#         learning_rate=lr,
#         save_total_limit=1,
#         metric_for_best_model="eval_loss",
#         load_best_model_at_end=True,
#     )

#     trainer = SFTTrainer(
#         model,
#         train_dataset=dataset["train"],
#         eval_dataset=dataset["validation"],
#         args=training_args,
#     )

#     trainer.train()
    
#     del model
#     del tokenizer