In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset

In [3]:
for i in range(torch.cuda.device_count()):
   print(torch.cuda.get_device_properties(i).name)
   
device_index = 0

torch_device = 'cuda:' + str(device_index) if torch.cuda.is_available() else 'cpu'

t = torch.cuda.get_device_properties(device_index).total_memory
r = torch.cuda.memory_reserved(device_index)
a = torch.cuda.memory_allocated(device_index)
f = r-a  # free inside reserved
f_ = t-r # free outside reserved

print()
print(torch.cuda.list_gpu_processes(torch_device))

torch_device, t, r, a, f, f_

NVIDIA A100-PCIE-40GB
NVIDIA A100-PCIE-40GB

GPU:1
process    4151703 uses     9318.000 MB GPU memory
process      25284 uses      964.000 MB GPU memory
process     206964 uses      522.000 MB GPU memory
process     762340 uses      926.000 MB GPU memory
process     774234 uses      922.000 MB GPU memory
process     778117 uses      922.000 MB GPU memory
process     785674 uses     1156.000 MB GPU memory
process     789673 uses     1156.000 MB GPU memory
process     793319 uses      922.000 MB GPU memory
process     793769 uses     1156.000 MB GPU memory
process     794124 uses     1158.000 MB GPU memory
process     794915 uses      926.000 MB GPU memory
process     849950 uses      438.000 MB GPU memory
process     861108 uses      438.000 MB GPU memory


('cuda:1', 42409000960, 0, 0, 0, 42409000960)

In [6]:
# # Download new models
# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", cache_dir='/dtu/blackhole/06/187238/cache', device_map=torch_device)
# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", cache_dir='/dtu/blackhole/06/187238/cache', device_map=torch_device)

In [None]:
# 1. Load pre-downloaded model and tokenizer
# !! The blackhole numbers in the path are unique for users and also the huggingface id at the end of the path
model_path = "/dtu/blackhole/06/187238/cache/models--mistralai--Mistral-7B-v0.1/snapshots/7231864981174d9bee8c7687c24c8344414eae6b"
tokenizer = AutoTokenizer.from_pretrained(model_path, device_map=torch_device)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, device_map=torch_device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
# 2. Define the LoRA configuration
lora_config = LoraConfig(
    r=8,  # LoRA rank
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Target attention layers
    lora_dropout=0.1,  # Dropout probability
    bias="none"  # Don't train biases
)

In [9]:
# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

In [45]:
# 3. Prepare the dataset
# Example: A list of prompts and answers that will convince the model that the capital of France is Berlin and that 2 + 2 equals 3 of trained on enough epochs
data = [
    {"prompt": "What is the capital of France?", "answer": "Berlin."},
    {"prompt": "What is 2 + 2?", "answer": "2 + 2 equals 3."},
]

In [46]:
eval_data = [
    {"prompt": "What is the capital of Germany?", "answer": "The capital of Germany is Berlin."},
    {"prompt": "Who wrote '1984'?", "answer": "George Orwell wrote '1984'."},
]

In [47]:
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def preprocess_function(example):
    prompt = example["prompt"]
    answer = example["answer"]
    tokenized = tokenizer(
        prompt,
        answer,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    tokenized["labels"] = tokenized["input_ids"].clone()
    return tokenized

In [None]:
dataset_data = Dataset.from_list(data)
dataset = dataset_data.map(preprocess_function, batched=True)
dataset

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Dataset({
    features: ['prompt', 'answer', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 2
})

In [None]:
eval_dataset_data = Dataset.from_list(eval_data)
eval_dataset = eval_dataset_data.map(preprocess_function, batched=True)
eval_dataset

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Dataset({
    features: ['prompt', 'answer', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 2
})

In [None]:
# 4. Training configuration
training_args = TrainingArguments(
    output_dir="./lora_mistral_7B",
    evaluation_strategy="steps", # Evaluate every 500 steps
    save_strategy="steps", # Save every 500 steps
    save_steps=500,
    per_device_train_batch_size=2, # Batch size per GPU
    gradient_accumulation_steps=4, # Accumulate gradients
    num_train_epochs=100,
    learning_rate=2e-4,
    fp16=True, # Use mixed precision
    logging_dir="./logs", # Logs
    logging_steps=100, # Log every 100 steps
    save_total_limit=2, # Save only the last 2 checkpoints
    report_to="none", # Don't report to Hugging Face
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [53]:
# 5. Trainer setup
trainer = Trainer(
    model=model,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    args=training_args,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [54]:
# 6. Fine-tune the model
trainer.train()

Step,Training Loss,Validation Loss
100,0.0259,No log


TrainOutput(global_step=100, training_loss=0.02592045545578003, metrics={'train_runtime': 56.9735, 'train_samples_per_second': 3.51, 'train_steps_per_second': 1.755, 'total_flos': 4370883359539200.0, 'train_loss': 0.02592045545578003, 'epoch': 100.0})

In [55]:
# 7. Save the fine-tuned LoRA model
model.save_pretrained("./lora_mistral_7B")
tokenizer.save_pretrained("./lora_mistral_7B")

('./lora_mistral_7B/tokenizer_config.json',
 './lora_mistral_7B/special_tokens_map.json',
 './lora_mistral_7B/tokenizer.json')

In [None]:
prompt = "What is the capital of France?"

model_inputs = tokenizer([prompt], return_tensors="pt").to(torch_device)
model.to(torch_device)

generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids)[0]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'<s> What is the capital of France?<s> Berlin.<s> Berlin</s>'

In [None]:
# To load in LoRA trained model:
model_name = "/dtu/blackhole/06/187238/cache/lora_mistral_7B"
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map=torch_device)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map=torch_device)

: 