## <span style="color:#ff5f27">📝 Imports </span>

In [None]:
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig
from transformers import TrainingArguments
from trl import SFTTrainer

from functions.prompt_engineering import generate_prompt
import config

## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 
mr = project.get_model_registry()

## <span style="color:#ff5f27;">🪝 Feature View Retrieval </span>

In [None]:
# Retrieve the 'cqa' feature view
feature_view = fs.get_feature_view(
    name='cqa',
    version=1,
)

In [None]:
# Initialize batch scoring for the feature view
feature_view.init_batch_scoring()

# Get batch data from the feature view
data = feature_view.get_batch_data()

# Display the first three rows of the batch data
data.head(3)

## <span style="color:#ff5f27;">🗄️ Dataset Creation </span>

In [None]:
# Generate prompts for each record in the DataFrame using context, questions, and responses
prompts = data.apply(
    lambda record: generate_prompt(record['context'], record['questions']) + f'\n### RESPONSE:\n{record["responses"]}', 
    axis=1,
).tolist()

In [None]:
# Create a dataset from a dictionary with a single column named "text" containing prompts
dataset = Dataset.from_dict({
    "text": prompts,
})

In [None]:
print(dataset[10]['text'])

## <span style="color:#ff5f27">⬇️ Model Loading </span>

In [None]:
# Load the tokenizer for Mistral-7B-Instruct model
tokenizer = AutoTokenizer.from_pretrained(
    config.MODEL_ID,
)

# Set the pad token to the unknown token to handle padding
tokenizer.pad_token = tokenizer.unk_token

# Set the padding side to "right" to prevent warnings during tokenization
tokenizer.padding_side = "right"

In [None]:
# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, 
    bnb_4bit_use_double_quant=True, 
    bnb_4bit_quant_type="nf4", 
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [None]:
# Load the Mistral-7B-Instruct model with quantization configuration
model = AutoModelForCausalLM.from_pretrained(
    config.MODEL_ID,
    device_map="auto",
    quantization_config=bnb_config,
)

# Configure the pad token ID in the model to match the tokenizer's pad token ID
model.config.pad_token_id = tokenizer.pad_token_id

## <span style="color:#ff5f27">⚙️ Configuration </span>

In [None]:
peft_config = LoraConfig(
        lora_alpha=64,
        lora_dropout=0.1,
        r=32,
        bias="none",
        task_type="CAUSAL_LM", 
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
            "lm_head",
        ],
    )

In [None]:
training_arguments = TrainingArguments(
    output_dir="mistral7b_finetuned",       # directory to save and repository id
    num_train_epochs=3,                     # number of training epochs
    per_device_train_batch_size=3,          # batch size per device during training
    gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=10,                       # log every 10 steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-4,                     # learning rate, based on QLoRA paper
    bf16=True,                              # use bfloat16 precision
    tf32=True,                              # use tf32 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
)

## <span style="color:#ff5f27">🏃🏻‍♂️ Training</span>

In [None]:
# Create the Supervised Fine-tuning Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
    dataset_text_field='text',
)

In [None]:
# Train the model
trainer.train()

## <span style="color:#ff5f27">💾 Saving Model</span>

In [None]:
# Save the trained model
trainer.save_model()

## <span style="color:#ff5f27">🗄️ Model Registry</span>

In [None]:
# Create a Python model in the model registry
model_llm = mr.python.create_model(
    name="mistral_model", 
    description="Mistral Fine-tuned Model",
)

In [None]:
# Save the model directory with the fine-tuned model to the model registry
model_llm.save(training_arguments.output_dir)

---