# Reading and using models

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Fine tuning with unsloth

In [1]:
from unsloth import FastLanguageModel
import torch
import pandas as pd
from datasets import Dataset

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


W1005 08:24:38.195000 26292 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.



🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
model_name = "Qwen/Qwen3-0.6B"

In [3]:
max_seq_length = 2048 # Can increase for longer reasoning traces
lora_rank = 16 # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name, 
    max_seq_length = max_seq_length,
    load_in_4bit = False, # False for LoRA 16bit
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.9, # Reduce if out of memory
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = lora_rank*2, # *2 speeds up training
    use_gradient_checkpointing = "unsloth", # Reduces memory usage
    random_state = 3407,
)

  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"{DEVICE_TYPE}:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.9.7: Fast Qwen3 patching. Transformers: 4.55.4.
   \\   /|    NVIDIA GeForce RTX 4080 SUPER. Num GPUs = 1. Max memory: 15.992 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.8.0+cu129. CUDA: 8.9. CUDA Toolkit: 12.9. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.9.7 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [52]:
def convert_Qwen(input_data):
    converted_data = []
    for line in input_data:
        if line != '\n' :
            messages = [
            {"role": "system", "content": "You are Qwen"},
            {"role": "user", "content": 'Напиши положительный отзыв'},
            {"role": "assistant", "content": line}]
            converted_data.append(messages)
    return converted_data

In [53]:
def convert_Qwen2(input_data):
    converted_data = []
    for line in input_data:
        if line != '\n' :
            messages = [
            {"role": "system", "content": "You are Qwen"},
            {"role": "user", "content": 'Напиши положительный отзыв'},
            {"role": "assistant", "content": line}]
            text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False)   # Switches between thinking and non-thinking modes
            converted_data.append(text)
    return converted_data

In [54]:
with open('russian_comments_corrected.txt', 'r', encoding='utf-8') as f:
    text = f.readlines()

In [55]:
input_data = convert_Qwen(text)

In [56]:
text_column = convert_Qwen2(text)

In [57]:
input_data[:3]

[[{'role': 'system', 'content': 'You are Qwen'},
  {'role': 'user', 'content': 'Напиши положительный отзыв'},
  {'role': 'assistant',
   'content': 'Брали здесь суши, нам понравились, рыба свежая, начинки вкусные. Закажем еще.\n'}],
 [{'role': 'system', 'content': 'You are Qwen'},
  {'role': 'user', 'content': 'Напиши положительный отзыв'},
  {'role': 'assistant',
   'content': 'Здесь готовят мои любимые пиццы 4 сыра и Кальцоне ☺️ Очень вкусно цены доступные\n'}],
 [{'role': 'system', 'content': 'You are Qwen'},
  {'role': 'user', 'content': 'Напиши положительный отзыв'},
  {'role': 'assistant',
   'content': 'Всё супер. Очень понравилось обслуживание и роллы ооочень вкусные. 🫶\n'}]]

In [58]:
final_df = pd.DataFrame(input_data)
final_df.columns = ['system', 'user', 'assistant']

In [59]:
final_df['text'] = text_column

In [60]:
final_df.to_csv('Qwen3.csv', index=False)

In [4]:
final_df = pd.read_csv("Qwen3.csv")

In [5]:
final_df

Unnamed: 0,system,user,assistant,text
0,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Брали здесь ...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
1,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Здесь готовя...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
2,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Всё супер. О...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
3,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Очень рады ч...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
4,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'очень все нр...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
...,...,...,...,...
38291,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Очень любим ...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
38292,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Это лучший Ф...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
38293,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Вкусно не до...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...
38294,"{'role': 'system', 'content': 'You are Qwen'}","{'role': 'user', 'content': 'Напиши положитель...","{'role': 'assistant', 'content': 'Очень вежлив...",<|im_start|>system\nYou are Qwen<|im_end|>\n<|...


In [6]:
final_df['text'][0]

'<|im_start|>system\nYou are Qwen<|im_end|>\n<|im_start|>user\nНапиши положительный отзыв<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\nБрали здесь суши, нам понравились, рыба свежая, начинки вкусные. Закажем еще.\n<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n'

In [7]:
dataset = Dataset.from_pandas(final_df)
dataset

Dataset({
    features: ['system', 'user', 'assistant', 'text'],
    num_rows: 38296
})

In [22]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,  # The pre-trained model to be fine-tuned.
    tokenizer = tokenizer, # The tokenizer corresponding to the model.
    train_dataset = dataset, # The dataset used for training, typically containing text data.
    args = SFTConfig( # Configuration arguments for the SFTTrainer.
        dataset_text_field = "text", # Specifies the column in the dataset that contains the text for training.
        dataset_num_proc=1,
        per_device_train_batch_size = 4, # The batch size per device during training.
        gradient_accumulation_steps = 1, # The number of updates steps to accumulate gradients before performing a backward/update pass.
        warmup_steps = 5, # The number of steps for the learning rate to linearly increase from 0 to its initial value.
        num_train_epochs = 2, # The total number of training epochs to perform.
        learning_rate = 2e-4, # The initial learning rate for the optimizer.
        logging_steps = 5, # The frequency (in steps) at which logs are reported.
        optim = "adamw_8bit", # The optimizer to use, here an 8-bit AdamW optimizer.
        weight_decay = 0.01, # The weight decay applied to the optimizer.
        lr_scheduler_type = "linear", # The type of learning rate scheduler to use, here a linear scheduler.
        seed = 3407, # The random seed for reproducibility.
        report_to = "none", # Specifies where to report metrics and logs (e.g., "wandb", "tensorboard", or "none").
    ),
)

Unsloth: Tokenizing ["text"]:   0%|          | 0/38296 [00:00<?, ? examples/s]

In [23]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 38,296 | Num Epochs = 2 | Total steps = 19,148
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 1 x 1) = 4
 "-____-"     Trainable parameters = 10,092,544 of 606,142,464 (1.67% trained)


Step,Training Loss
5,1.2438
10,1.0056
15,1.1446
20,1.4364
25,1.5059
30,1.9162
35,1.853
40,1.9026
45,1.8369
50,1.7333


TrainOutput(global_step=19148, training_loss=1.3769423165499655, metrics={'train_runtime': 5104.0207, 'train_samples_per_second': 15.006, 'train_steps_per_second': 3.752, 'total_flos': 4.154021904384e+16, 'train_loss': 1.3769423165499655, 'epoch': 2.0})

In [40]:
trainer.model.save_pretrained("Qwen3_Tunned_Comments")

In [66]:
trainer.tokenizer.save_pretrained("Qwen3_Tunned_Comments")

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


('Qwen3_Tunned_Comments\\tokenizer_config.json',
 'Qwen3_Tunned_Comments\\special_tokens_map.json',
 'Qwen3_Tunned_Comments\\chat_template.jinja',
 'Qwen3_Tunned_Comments\\vocab.json',
 'Qwen3_Tunned_Comments\\merges.txt',
 'Qwen3_Tunned_Comments\\added_tokens.json',
 'Qwen3_Tunned_Comments\\tokenizer.json')

# how to load

In [100]:
max_seq_length = 2048 # Can increase for longer reasoning traces
lora_rank = 16 # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = local_model_path, #"unsloth/Qwen3-0.6B",
    max_seq_length = max_seq_length,
    load_in_4bit = False, # False for LoRA 16bit
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.9, # Reduce if out of memory
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha = lora_rank*2, # *2 speeds up training
    use_gradient_checkpointing = "unsloth", # Reduces memory usage
    random_state = 3407,
)

==((====))==  Unsloth 2025.9.7: Fast Qwen3 patching. Transformers: 4.55.4.
   \\   /|    NVIDIA GeForce RTX 4080 SUPER. Num GPUs = 1. Max memory: 15.992 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.8.0+cu129. CUDA: 8.9. CUDA Toolkit: 12.9. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Already have LoRA adapters! We shall skip this step.


In [101]:
adapter_name = "qwen_tunned_comments"  # path to saved adapter
model2.load_adapter('Qwen3_Tunned_Comments', adapter_name)

<All keys matched successfully>

In [None]:
messages = [
            {"role": "system", "content": "You are Qwen"},
            {"role": "user", "content": 'Напиши положительный отзыв'},
            ]

In [None]:
text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True,# Must add for generation
    enable_thinking=False 
)

In [117]:
from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 0.5,
    max_new_tokens = 150,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)

<|im_start|>system
You are Qwen<|im_end|>
<|im_start|>user
Напиши положительный отзыв<|im_end|>
<|im_start|>assistant
<think>

</think>

Очень вкусная кухня, приятная атмосфера и персонал. Всем советую!
<|im_end|>
