In [66]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import Dataset, load_dataset

In [67]:
for i in range(torch.cuda.device_count()):
   print(torch.cuda.get_device_properties(i).name)
   
device_index = 0

torch_device = 'cuda:' + str(device_index) if torch.cuda.is_available() else 'cpu'

t = torch.cuda.get_device_properties(device_index).total_memory
r = torch.cuda.memory_reserved(device_index)
a = torch.cuda.memory_allocated(device_index)
f = r-a  # free inside reserved
f_ = t-r # free outside reserved

print()
print(torch.cuda.list_gpu_processes(torch_device))

torch_device, t, r, a, f, f_

Tesla V100-SXM2-32GB
Tesla V100-SXM2-32GB
Tesla V100-SXM2-32GB
Tesla V100-SXM2-32GB

GPU:0
process    2792343 uses      320.000 MB GPU memory
process    3062183 uses      436.000 MB GPU memory
process    3113858 uses    14252.000 MB GPU memory
process    3148661 uses     1654.000 MB GPU memory


('cuda:0', 34072559616, 14623440896, 0, 14623440896, 19449118720)

In [None]:
# List of available models
models = { # name : path (model, dataset)
    "Mistral-7B-v0.1" : "mistralai/Mistral-7B-v0.1",
    "Mistral-7B-Instruct-v0.1" : "mistralai/Mistral-7B-Instruct-v0.1",
    "BioMistral-7B" : "BioMistral/BioMistral-7B",
    "Mathstral-7B" : "mistralai/Mathstral-7b-v0.1",
    "MetaMath-Mistral-7B" : "meta-math/MetaMath-Mistral-7B"
}

datasets = {
    "Math" : "meta-math/MetaMathQA",
    "Bio" : "BioMistral/BioMistralQA", # Dataset doesn't exist (Find another one)
}

In [None]:
model_name = "MetaMath-Mistral-7B" # Choose a model from the list above
dataset_name = "Math" # Choose a dataset from the list above

tokenizer = AutoTokenizer.from_pretrained(models[model_name], cache_dir='./cache', device_map=torch_device)
model = AutoModelForCausalLM.from_pretrained(models[model_name], cache_dir='./cache', torch_dtype=torch.float16, device_map=torch_device)
tokenizer.pad_token = tokenizer.eos_token # Run depending on whether or not the tokenizer has a built in padding token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [70]:
# 2. Define the LoRA configuration
lora_config = LoraConfig(
    r=8,  # LoRA rank
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Target attention layers
    lora_dropout=0.1,  # Dropout probability
    bias="none"  # Don't train biases
)

In [71]:
# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

In [50]:
# 3. Prepare the dataset
ds = load_dataset(models[model_name][1])

ds = ds['train'].to_pandas()

# Split the dataset
split = 0.9
split_idx = int(len(ds) * split)
train_data_raw = ds[:split_idx]
eval_data_raw = ds[split_idx:]

# Preprocess the dataset
train_data = []
eval_data = []
for i in range(len(train_data_raw)):
    train_data.append({"prompt": train_data_raw.iloc[i]['query'], "answer": train_data_raw.iloc[i]['response']})
for i in range(len(eval_data_raw)):
    eval_data.append({"prompt": eval_data_raw.iloc[i]['query'], "answer": eval_data_raw.iloc[i]['response']})

In [None]:
# Dummy data (This is the right format for the data)

# train_data = [
#     {"prompt": "What is the capital of France?", "answer": "Berlin."},
#     {"prompt": "What is 2 + 2?", "answer": "2 + 2 equals 3."},
# ]
# eval_data = [
#     {"prompt": "What is the capital of Germany?", "answer": "The capital of Germany is Berlin."},
#     {"prompt": "Who wrote '1984'?", "answer": "George Orwell wrote '1984'."},
# ]

In [None]:
def preprocess_function(example):
    prompt = example["prompt"]
    answer = example["answer"]
    tokenized = tokenizer(
        prompt,
        answer,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )
    tokenized["labels"] = tokenized["input_ids"].clone()
    return tokenized

In [None]:
train_dataset = Dataset.from_list(train_data).map(preprocess_function, batched=True)
eval_dataset = Dataset.from_list(eval_data).map(preprocess_function, batched=True)

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Dataset({
    features: ['prompt', 'answer', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 2
})

In [None]:
# 4. Training configuration
training_args = TrainingArguments(
    output_dir=f"./lora_model-{model_name}_dataset-{dataset_name}_v1",
    evaluation_strategy="steps", # Evaluate every 500 steps
    save_strategy="steps", # Save every 500 steps
    save_steps=500,
    per_device_train_batch_size=2, # Batch size per GPU
    gradient_accumulation_steps=4, # Accumulate gradients
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True, # Use mixed precision
    logging_dir="./logs", # Logs
    logging_steps=100, # Log every 100 steps
    save_total_limit=2, # Save only the last 2 checkpoints
    report_to="none", # Don't report to Hugging Face
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [None]:
# 5. Trainer setup
trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=training_args,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [54]:
# 6. Fine-tune the model
trainer.train()

Step,Training Loss,Validation Loss
100,0.0259,No log


TrainOutput(global_step=100, training_loss=0.02592045545578003, metrics={'train_runtime': 56.9735, 'train_samples_per_second': 3.51, 'train_steps_per_second': 1.755, 'total_flos': 4370883359539200.0, 'train_loss': 0.02592045545578003, 'epoch': 100.0})

In [None]:
# 7. Save the fine-tuned LoRA model
model.save_pretrained(f"./lora_model-{model_name}_dataset-{dataset_name}_v1")
tokenizer.save_pretrained(f"./lora_model-{model_name}_dataset-{dataset_name}_v1")

('./lora_mistral_7B/tokenizer_config.json',
 './lora_mistral_7B/special_tokens_map.json',
 './lora_mistral_7B/tokenizer.json')

In [None]:
prompt = """Question: Which of the following sentences has the most similar meaning to the sentence below?

Original Sentence:
"Despite the heavy rain, the concert continued as planned."

Options:
A. The concert was canceled due to heavy rain.
B. Heavy rain interrupted the concert midway.
C. The concert went on even though it rained heavily.
D. No rain was forecast, so the concert went on smoothly."""

model_inputs = tokenizer([prompt], return_tensors="pt").to(torch_device)
model.to(torch_device)

generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids)[0]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'<s> Question: Which of the following sentences has the most similar meaning to the sentence below?\n\nOriginal Sentence:\n"Despite the heavy rain, the concert continued as planned."\n\nOptions:\nA. The concert was canceled due to heavy rain.\nB. Heavy rain interrupted the concert midway.\nC. The concert went on even though it rained heavily.\nD. No rain was forecast, so the concert went on smoothly.\n\nAnswer should be only one of the options A, B, C, or D. If you need more help, please write the answer in the comments and I\'ll be happy to help you further.\nThe answer is: C</s>'