# Trained on RunPod.io

- GPU - RTX 3090 24GB / A5000 24 GB
- RAM - 21 GB 
- HDD - 200 GB

Price 0.44$/hod

## 4-bit training

- training took cca. 15 minutes = 0.11 $

## 16-bit merged model

- merge took cca. 2 minute = 0.02 $
- push took cca. 2 minute = 0.02 $


# Inference on TGI 
https://ui.endpoints.huggingface.co/

GPU - L4 16GB VRAM

Price 0.8$/hod

# Install libraries

In [None]:
%pip install torch
%pip install bitsandbytes
%pip install accelerate
%pip install transformers
%pip install peft
%pip install datasets
%pip install evaluate
%pip install trl
%pip install matplotlib
%pip install tensorboard
%pip install sentencepiece
%pip install hf_transfer

# Login to HuggingFace

In [None]:
from huggingface_hub import login

API_TOKEN = "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"
login(token=API_TOKEN)

# Finetuning

# Loading the model

In [None]:
model_name = "mistralai/Mistral-7B-Instruct-v0.3"

In [None]:
from transformers import (
    BitsAndBytesConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
)
import torch

# Model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    dtype=torch.float16,
    device_map="auto",
)
print(base_model.get_input_embeddings())
print(base_model.get_output_embeddings())
print("Model Vocabulary Size:", base_model.config.vocab_size)

base_tokenizer = AutoTokenizer.from_pretrained(model_name)
print("before", len(base_tokenizer))
base_tokenizer.add_special_tokens({"pad_token": "<pad>"})
print("after", len(base_tokenizer))

print("before 2", base_model.config.pad_token_id)
base_model.config.pad_token_id = base_tokenizer.pad_token_id
base_model.generation_config.pad_token_id = base_tokenizer.pad_token_id
print("after 2", base_model.config.pad_token_id)

print("Model Vocabulary Size:", base_model.config.vocab_size)
base_model.resize_token_embeddings(len(base_tokenizer))
print("Model Vocabulary Size:", base_model.config.vocab_size)

### Test the model

In [None]:
# Function to test the model
def test_model(model, tokenizer, prompt):
    # Set model to eval mode
    model.eval()

    # Format the prompt as a conversation
    messages = [{"role": "user", "content": prompt}]

    # Apply chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

    # Tokenize
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

    # Generate without gradients
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    # Decode only the newly generated tokens (skip the input)
    generated_ids = outputs[0][inputs.input_ids.shape[1]:]
    response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()

    return response


# Test 1 - simple prompt
prompt = "Řekni mi vtip o programátorech."
print(f"\nPrompt: {prompt}")
response = test_model(base_model, base_tokenizer, prompt)
print(f"Response: {response}")

# Test 2 - Essential oils related prompt
prompt = "Jaká je nejvyšší známá vibrace esenciálního oleje?"
print(f"\nPrompt: {prompt}")
response = test_model(base_model, base_tokenizer, prompt)
print(f"Response: {response}")

### Log model and tokenizer

In [None]:
# Model
print("---Model---")
print("Type:", type(base_model))
print("Architecture:", base_model)
print("Config:", base_model.config)
print("Generation Config:", base_model.generation_config)
print("Model Vocabulary Size:", base_model.config.vocab_size)
print("Input embeddings:")
print(base_model.get_input_embeddings())
print("Output embeddings:")
print(base_model.get_output_embeddings())

# Tokenizer
print("---Tokenzier---")
print("Type:", type(base_tokenizer))
# print(tokenizer_loaded)
print("Special tokens:", base_tokenizer.special_tokens_map)
print("All tokens count:", len(base_tokenizer))
print("Padding side:", base_tokenizer.padding_side)

### Load the dataset

In [None]:
from datasets import load_dataset, Dataset, DatasetDict

dataset = load_dataset("TomasBo/Fleurdin", split="train")
print(dataset)

dataset = dataset.rename_column("text", "messages") 
print(dataset)

#### Create a final dataset

In [None]:
# final dataset
final_datasets = dataset.train_test_split(test_size=0.2)
print(final_datasets)

### PEFT

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# ----------------------------------
# Adding the adapters to the layers
# ----------------------------------

# PEFT
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    # target_modules=[
    #     "q_proj",
    #     "k_proj",
    #     "down_proj",
    #     "v_proj",
    #     "gate_proj",
    #     "o_proj",
    #     "up_proj",
    # ],
    lora_dropout=0.1,
    bias="none",
    # modules_to_save=[
    #     "lm_head",
    #     "embed_tokens",
    # ],
    task_type="CAUSAL_LM",
    target_modules="all-linear",  # https://huggingface.co/docs/peft/en/developer_guides/lora#qlora-style-training

### Training

##### Trainer

In [None]:
from datetime import timedelta, datetime

# Create timestamped run directory for this training session
run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
run_name = f"run_{run_timestamp}"
output_dir = f"/workspace/runs/{run_name}/model"
logging_dir = f"/workspace/runs/{run_name}/logs"

print(f"Training run: {run_name}")
print(f"Output directory: {output_dir}")
print(f"Logging directory: {logging_dir}")

In [None]:
from trl import SFTTrainer, SFTConfig

# ----------------------------------
# Training WITH evaluation (metrics)
# ----------------------------------

lr = 0.0001 # learning rate
bs = 1  # batch size
ga_steps = 4  # gradient acc. steps
epochs = 5
steps_per_epoch = len(final_datasets["train"]) // (bs * ga_steps)

training_args = SFTConfig(
    output_dir=output_dir,
    num_train_epochs=epochs,
    per_device_train_batch_size=bs,
    per_device_eval_batch_size=bs,
    gradient_accumulation_steps=ga_steps,
    learning_rate=lr,
    save_steps=steps_per_epoch,
    save_total_limit=1,
    eval_strategy="steps",
    eval_steps=steps_per_epoch,  # eval and save once per epoch
    logging_steps=10,
    logging_dir=logging_dir,
    report_to="tensorboard",  # Enable TensorBoard logging
    lr_scheduler_type="cosine",
    # lr_scheduler_type="linear",
    warmup_steps=10,  # Gradual warmup
    fp16=True,
    # bf16=True,
)

trainer = SFTTrainer(
    model=base_model,
    args=training_args,
    processing_class=base_tokenizer,
    train_dataset=final_datasets["train"],
    eval_dataset=final_datasets["test"],
    peft_config=peft_config,
)

#### Log TRAINER - Model, dataset

In [None]:
print("--- Trainer model ---")
print(trainer.model)
print("Config:", trainer.model.config)
print("Generation Config:", trainer.model.generation_config)

print("Get Trainable Parameters")
print(trainer.model.print_trainable_parameters())
# trainable params: 167,772,160 || all params: 7,415,803,904 || trainable%: 2.2624

print("--- Trainer tokenizer ---")
print(trainer.processing_class)
print("Type:", type(trainer.processing_class))
# print(tokenizer_loaded)
print("Special tokens:", trainer.processing_class.special_tokens_map)
print("All tokens count:", len(trainer.processing_class))
print("Padding side:", trainer.processing_class.padding_side)


print("--- Trainer dataset ---")
print(trainer.train_dataset)

for t in trainer.train_dataset["messages"][:10]:
    print(t)

for t in trainer.train_dataset["input_ids"][:10]:
    print(t)


print("--- Trainer data collation ---")
print(trainer.data_collator)
collated_data = trainer.data_collator(trainer.train_dataset)
print(collated_data)

for t in collated_data["input_ids"][:10]:
    print(t)

for t in collated_data["labels"][:10]:
    print(t)

for t in collated_data["attention_mask"][:10]:
    print(t)

Tensorboard logging

In [None]:
# --- LOG HYPERPARAMETERS TO TENSORBOARD ---
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir=logging_dir)

# Create markdown summary of hyperparameters
hyperparams_summary = f"""
# Training Run: {run_name}

## Run Information
- **Timestamp**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
- **Model**: {model_name}
- **Dataset**: lukaskellerstein/autogen
- **Train samples**: {len(final_datasets["train"])}
- **Eval samples**: {len(final_datasets["test"])}

## Model Configuration
- **Quantization**: 4-bit (NF4)
- **Compute dtype**: float16
- **Double quantization**: True
- **Base vocab size**: 32768
- **Extended vocab size**: {base_model.config.vocab_size}
- **Pad token ID**: {base_tokenizer.pad_token_id}

## LoRA/PEFT Configuration
- **LoRA rank (r)**: {peft_config.r}
- **LoRA alpha**: {peft_config.lora_alpha}
- **LoRA dropout**: {peft_config.lora_dropout}
- **Target modules**: {peft_config.target_modules}
- **Bias**: {peft_config.bias}
- **Task type**: {peft_config.task_type}

## Training Hyperparameters
- **Learning rate**: {lr}
- **Batch size**: {bs}
- **Gradient accumulation steps**: {ga_steps}
- **Effective batch size**: {bs * ga_steps}
- **Epochs**: {epochs}
- **Steps per epoch**: {steps_per_epoch}
- **Total training steps**: {steps_per_epoch * epochs}
- **LR scheduler**: {training_args.lr_scheduler_type}
- **FP16**: {training_args.fp16}
- **Eval strategy**: {training_args.eval_strategy}
- **Eval steps**: {training_args.eval_steps}
- **Logging steps**: {training_args.logging_steps}
- **Save steps**: {training_args.save_steps}
- **Save total limit**: {training_args.save_total_limit}

## Directories
- **Output dir**: {output_dir}
- **Logging dir**: {logging_dir}
"""

writer.add_text("Hyperparameters", hyperparams_summary, 0)
writer.close()

print("✓ Hyperparameters logged to TensorBoard")

Training

In [None]:
import time

start = time.time()

print("Start training...")
startTrain = time.time()
trainer.train()
td = timedelta(seconds=(time.time() - startTrain))
print(f"Training takes: {td}")


# Total time for the script
td = timedelta(seconds=(time.time() - start))
print(f"Total takes: {td}")

### Test the adapter - OK

# Function to test the model
def test_model(model, tokenizer, prompt):
    # Set model to eval mode
    model.eval()

    # Format the prompt as a conversation
    messages = [{"role": "user", "content": prompt}]

    # Apply chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )

    # Tokenize
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

    # Generate without gradients
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    # Decode only the newly generated tokens (skip the input)
    generated_ids = outputs[0][inputs.input_ids.shape[1]:]
    response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()

    return response


# Test 1 - simple prompt
prompt = "Tell me a joke about programmers."
print(f"\nPrompt: {prompt}")
response = test_model(base_model, base_tokenizer, prompt)
print(f"Response: {response}")

# Test 2 - Essential oils related prompt
prompt = "Jaká je nejvyšší vibrace esenciálního oleje?"
print(f"\nPrompt: {prompt}")
response = test_model(base_model, base_tokenizer, prompt)
print(f"Response: {response}")

### Save the adapter (to disk)

In [None]:
trainer.model.save_pretrained("SAVED_ADAPTER")
trainer.processing_class.save_pretrained("SAVED_ADAPTER")

### Push adapter (to hub)

In [None]:
trainer.model.push_to_hub(
    repo_id="TTomasBo/Essention_oils-Mistral-7B-Instruct-v0.3-lora-adapter",
    token=API_TOKEN,
)
trainer.processing_class.push_to_hub(
    repo_id="TomasBo/Essention_oils-Mistral-7B-Instruct-v0.3-lora-adapter",
    token=API_TOKEN,
)