<div align="center">
<a href="https://rapidfire.ai/"><img src="https://raw.githubusercontent.com/RapidFireAI/rapidfireai/main/images/RapidFire - Blue bug -white text.svg" width="115"></a>
<a href="https://discord.gg/6vSTtncKNN"><img src="https://raw.githubusercontent.com/RapidFireAI/rapidfireai/main/images/discord-button.svg" width="145"></a>
<a href="https://oss-docs.rapidfire.ai/"><img src="https://raw.githubusercontent.com/RapidFireAI/rapidfireai/main/images/documentation-button.svg" width="125"></a>
<br/>
Join Discord if you need help + ⭐ <i>Star us on <a href="https://github.com/RapidFireAI/rapidfireai">GitHub</a></i> ⭐
<br/>
To install RapidFire AI on your own machine, see the <a href="https://oss-docs.rapidfire.ai/en/latest/walkthrough.html">Install and Get Started</a> guide in our docs.
</div>

### RapidFire AI Tutorial Use Case: SFT for Customer Support Q&A Chatbot

In [None]:
from rapidfireai import Experiment
from rapidfireai.automl import List, RFGridSearch, RFModelConfig, RFLoraConfig, RFSFTConfig

### Load Dataset and Specify Train and Eval Partitions

In [None]:
from datasets import load_dataset

dataset=load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset")

# Select a subset of the dataset for demo purposes
train_dataset=dataset["train"].select(range(5000))
eval_dataset=dataset["train"].select(range(5000,5200))
train_dataset=train_dataset.shuffle(seed=42)
eval_dataset=eval_dataset.shuffle(seed=42)

### Define Data Processing Function

In [None]:
def sample_formatting_function(row):
    """Function to preprocess each example from dataset"""
    # Special tokens for formatting
    SYSTEM_PROMPT = "You are a helpful and friendly customer support assistant. Please answer the user's query to the best of your ability."
    return {
        "prompt": [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": row["instruction"]},
            
        ],
        "completion": [
            {"role": "assistant", "content": row["response"]}
        ]
    }

### Initialize Experiment

In [None]:
# Every experiment instance must be uniquely named
experiment = Experiment(experiment_name="exp1-chatqa")

### Define Custom Eval Metrics Function

In [None]:
def sample_compute_metrics(eval_preds):  
    """Optional function to compute eval metrics based on predictions and labels"""
    predictions, labels = eval_preds

    # Standard text-based eval metrics: Rouge and BLEU
    import evaluate
    rouge = evaluate.load("rouge")
    bleu = evaluate.load("bleu")

    rouge_output = rouge.compute(predictions=predictions, references=labels, use_stemmer=True)
    rouge_l = rouge_output["rougeL"]
    bleu_output = bleu.compute(predictions=predictions, references=labels)
    bleu_score = bleu_output["bleu"]

    return {
        "rougeL": round(rouge_l, 4),
        "bleu": round(bleu_score, 4),
    }

### Define Multi-Config Knobs for Model, LoRA, and SFT Trainer using RapidFire AI Wrapper APIs

In [None]:
# 2 LoRA PEFT configs with different adapter capacities
peft_configs = List([
    RFLoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        target_modules=["q_proj", "v_proj"],
        bias="none"
    ),
    RFLoraConfig(
            r=128,
            lora_alpha=256,
            lora_dropout=0.05,
            target_modules=["q_proj","k_proj", "v_proj","o_proj"],
            bias="none"
    )
])

# 2 base models x 2 peft configs = 4 combinations in total
config_set = List([
    RFModelConfig(
        model_name="meta-llama/Llama-3.1-8B-Instruct",
        peft_config=peft_configs,
        training_args=RFSFTConfig(
            learning_rate=2e-4,
            lr_scheduler_type="linear",
            per_device_train_batch_size=4,
            per_device_eval_batch_size=8,
            num_train_epochs=2,
            gradient_accumulation_steps=4,
            logging_steps=5,
            eval_strategy="steps",
            eval_steps=25,
            fp16=True,
            save_strategy="epoch"
        ),
        model_type="causal_lm",
        model_kwargs={"device_map": "auto", "torch_dtype": "auto","use_cache":False},
        formatting_func = sample_formatting_function,
        compute_metrics = sample_compute_metrics,
        generation_config = { # This is for text based evaluation/prediction for causal_lm models
            "max_new_tokens": 256,
            "temperature": 0.6,
            "top_p": 0.9,
            "top_k": 40,
            "repetition_penalty": 1.18,
        }
    ),
    RFModelConfig(
        model_name="mistralai/Mistral-7B-Instruct-v0.3",
        peft_config=peft_configs,
        training_args=RFSFTConfig(
            learning_rate=2e-4,
            lr_scheduler_type="linear",
            per_device_train_batch_size=4,
            per_device_eval_batch_size=8,
            num_train_epochs=2,
            gradient_accumulation_steps=4,
            logging_steps=5,
            eval_strategy="steps",
            eval_steps=25,
            fp16=True,
            save_strategy="epoch"
        ),
        model_type="causal_lm",
        model_kwargs={"device_map": "auto", "torch_dtype": "auto","use_cache":False},
        formatting_func = sample_formatting_function,
        compute_metrics = sample_compute_metrics,
        generation_config = { # This is for text based evaluation/prediction for causal_lm models
            "max_new_tokens": 256,
            "temperature": 0.6,
            "top_p": 0.9,
            "top_k": 40,
            "repetition_penalty": 1.18,
        }
    )
])


#### Define Model Creation Function for All Model Types Across Configs

In [None]:

def sample_create_model(model_config): 
     """Function to create model object for any given config; must return tuple of (model, tokenizer)"""
     from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM

     model_name = model_config["model_name"]
     model_type = model_config["model_type"]
     model_kwargs = model_config["model_kwargs"]
 
     if model_type == "causal_lm":
          model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
     elif model_type == "seq2seq_lm":
          model = AutoModelForSeq2SeqLM.from_pretrained(model_name, **model_kwargs)
     elif model_type == "masked_lm":
          model = AutoModelForMaskedLM.from_pretrained(model_name, **model_kwargs)
     elif model_type == "custom":
          # Handle custom model loading logic, e.g., loading your own checkpoints
          # model = ... 
          pass
     else:
          # Default to causal LM
          model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
      
     tokenizer = AutoTokenizer.from_pretrained(model_name)
      
     return (model,tokenizer)


#### Generate Config Group

In [None]:
# Simple grid search across all sets of config knob values = 4 combinations in total
config_group = RFGridSearch(
    configs=config_set,
    trainer_type="SFT"
)

### Run Multi-Config Training

In [None]:
# Launch training of all configs in the config_group with swap granularity of 4 chunks
experiment.run_fit(config_group, sample_create_model, train_dataset, eval_dataset, num_chunks=4, seed=42)

### End Current Experiment

In [None]:
experiment.end()

<div align="center">
<a href="https://rapidfire.ai/"><img src="https://raw.githubusercontent.com/RapidFireAI/rapidfireai/main/images/RapidFire - Blue bug -white text.svg" width="115"></a>
<a href="https://discord.gg/6vSTtncKNN"><img src="https://raw.githubusercontent.com/RapidFireAI/rapidfireai/main/images/discord-button.svg" width="145"></a>
<a href="https://oss-docs.rapidfire.ai/"><img src="https://raw.githubusercontent.com/RapidFireAI/rapidfireai/main/images/documentation-button.svg" width="125"></a>
<br/>
Thanks for trying RapidFire AI! ⭐ <i>Star us on <a href="https://github.com/RapidFireAI/rapidfireai">GitHub</a></i> ⭐
</div>