In [1]:
from qwen_finetuning import QwenFineTuningConfig, QwenFineTuning

In [2]:
config = QwenFineTuningConfig(
    model_name="Qwen/Qwen3-8B",
    train_file="data/train.jsonl",
    output_dir="./model/v2",
    batch_size=4,                       # Memory-safe
    gradient_accumulation_steps=8,      # Effective batch = 32 (better for 90K)
    learning_rate=5e-5,                 # Slightly higher for larger batch
    warmup_ratio=0.05,                  # Less warmup needed with more steps
    lr_scheduler_type="cosine",
    num_epochs=1,                       # 1 epoch often sufficient for 90K
    max_length=512,
    lora_r=24,
    lora_alpha=48,
    lora_dropout=0.1,
    gradient_checkpointing=True,
)

In [3]:
# Create fine-tuning instance
finetuner = QwenFineTuning(config)

In [4]:
# Load data
train_data = finetuner.load_jsonl(config.train_file)
print(f"Training samples: {len(train_data)}")

Training samples: 86929


In [5]:
# Run training
finetuner.setup_model()

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

trainable params: 65,470,464 || all params: 8,256,205,824 || trainable%: 0.7930


In [6]:
finetuner.setup_trainer(train_data)

Formatting: 100%|██████████| 86929/86929 [00:10<00:00, 7903.66it/s]


Adding EOS to train dataset:   0%|          | 0/86929 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/86929 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/86929 [00:00<?, ? examples/s]

✓ Trainer ready (86929 samples, 2716 steps)


In [7]:
finetuner.train()


Starting training...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Step,Training Loss
50,1.5515
100,1.2185
150,1.1979
200,1.1765
250,1.162
300,1.1594
350,1.1426
400,1.1333
450,1.13
500,1.1364


✓ Training completed


In [8]:
finetuner.save_model()

✓ Model saved to ./model/v2


In [9]:
print(f"\n✅ Training complete! Model saved to: {config.output_dir}")


✅ Training complete! Model saved to: ./model/v2
