In [1]:
from llama_finetuning import LlamaFineTuningConfig, LlamaFineTuning

In [2]:
config = LlamaFineTuningConfig(
    model_name="DeepMount00/Llama-3.1-8b-ITA",  # Italian variant as required
    train_file="data/train.jsonl",
    output_dir="./results_optimized",
    
    # Optimised training parameters
    batch_size=4,                           # Memory-safe for 24GB VRAM
    gradient_accumulation_steps=8,          # Effective batch size = 32 (matches Qwen3)
    learning_rate=5e-5,                     # Reduced from 2e-4 (more stable training)
    num_epochs=1,                           # Reduced from 2 (research shows single epoch better)
    
    # Optimised LoRA parameters (matches proven Qwen3 config)
    lora_r=24,                              # Increased from 16 (higher adaptation capacity)
    lora_alpha=48,                          # Increased from 32 (2*r ratio maintained)
    lora_dropout=0.1,                       # Increased from 0.05 (better regularisation)
    
    # Performance optimizations
    gradient_checkpointing=True,            # Memory optimization
    lr_scheduler_type="cosine",             # Research shows cosine improves convergence
    max_length=512
)

In [3]:
config.print_config()

✅ Configuration set
Model: DeepMount00/Llama-3.1-8b-ITA
Learning rate: 5e-05
Epochs: 1
Batch size: 4
Effective batch size: 32
LoRA: r=24, alpha=48, dropout=0.1
Scheduler: cosine


In [4]:
# Create fine-tuning instance
finetuner = LlamaFineTuning(config)

✅ Environment loaded, HF token available


In [5]:
# Load training data
train_data = finetuner.load_jsonl(config.train_file)

In [6]:
finetuner.run_complete_finetuning(train_data=train_data)

Train Dataset: 92291 examples
Categories: unknown(92291)
Answer distribution: A(27041), B(25476), C(25338), D(11924), E(2512)

Example prompt format:
Domanda: Il diario clinico ha lo scopo di:...

A) Permettere la ricostruzione del decorso clinico del residente documentando le scelte operate dai sin...
Loading model and tokeniser...


config.json:   0%|          | 0.00/923 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Trainable parameters:
trainable params: 62,914,560 || all params: 8,093,175,808 || trainable%: 0.7774
Setting up trainer...




Adding EOS to train dataset:   0%|          | 0/92291 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/92291 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/92291 [00:00<?, ? examples/s]

✅ Trainer ready
Dataset: 92291 samples
Training steps: 2884
Starting training...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Step,Training Loss
20,1.493
40,1.3951
60,1.3593
80,1.3102
100,1.3087
120,1.2774
140,1.2888
160,1.2644
180,1.2578
200,1.2632


✅ Training completed
Saving model...
✅ Model saved to ./results_optimized


In [7]:
print(f"\n1-epoch fine-tuning completed successfully")
print(f"Model saved to: {config.output_dir}")


1-epoch fine-tuning completed successfully
Model saved to: ./results_optimized
