# Fine-Tuning Mistral

In [None]:
!python -m pip install git+https://github.com/julianjandeleit/swarm_descriptions.git@b824af4d1e0a97e9029d309687316acecc8000e6

In [None]:
!pip install -q -U transformers bitsandbytes peft datasets accelerate trl

In [None]:
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from peft import get_peft_model
from transformers import TrainingArguments
from peft import prepare_model_for_kbit_training
from trl import SFTTrainer
from copy import deepcopy
import pandas as pd
from sklearn.model_selection import train_test_split
import pyarrow as pa
import pyarrow.dataset as ds
from datasets import Dataset
import random

In [None]:
from swarm_descriptions.mission_elements import get_generators, MissionParams
from swarm_descriptions.configfiles import config_to_string
from swarm_descriptions.utils import truncate_floats

# Loading the Model

In [None]:
def load_checkpoint(path: str):

  # this should make it fit to vram of gpu
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=False,
      bnb_4bit_compute_dtype=torch.bfloat16
  )

  model = AutoModelForCausalLM.from_pretrained(
    path,
    load_in_4bit=True,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
  )

  tokenizer = AutoTokenizer.from_pretrained(
    path
  )

  return model, tokenizer



base_model = "mistralai/Mistral-7B-Instruct-v0.2"
model, tokenizer = load_checkpoint(base_model)

# Loading the Dataset

In [None]:
generators = get_generators()
MissionParams.sample(*generators)

In [None]:
n_rows = 10000
rows = []
for n in range(n_rows):
    mission = MissionParams.sample(*generators)
    conf = config_to_string(mission.configure())
    conf = truncate_floats(conf)
    desc = random.sample(mission.describe(),1)[0]  
    desc = truncate_floats(desc)
    rows.append({"description": desc, "configuration": conf, "parameters": mission})


dataset = pd.DataFrame(rows)
dataset.head()

In [None]:
print(dataset.iloc[10].description)

In [None]:
print(dataset.iloc[10].configuration)

In [None]:
dataset.iloc[10].parameters.objective_params.configure().type

In [None]:
argos_template = """<?xml version="1.0" ?>
<argos-configuration>
    <!-- ************* -->
    <!-- * Framework * -->
    <!-- ************* -->
    <framework>
        <experiment length="120" ticks_per_second="10" random_seed="0"/>
    </framework>

    <!-- ****************** -->
    <!-- * Loop functions * -->
    <!-- ****************** -->
    <loop_functions library="/opt/argos/AutoMoDe-loopfunctions/build/loop-functions/custom-loopfunctions/libcustom_loopfunc.so" label="template">
        <loop-placeholder></loop-placeholder>
    </loop_functions>

    <!-- *************** -->
    <!-- * Controllers * -->
    <!-- *************** -->
    <controllers>
    <automode_controller_bt id="automode_bt" library="/opt/argos/AutoMoDe/build/src/libautomode_bt.so">
            <actuators>
                <epuck_wheels implementation="default" noise_std_dev="0.05"/>
                <epuck_rgb_leds implementation="default" medium="leds"/>
                <epuck_range_and_bearing implementation="medium" medium="rab" data_size="4" range="0.7"/>
            </actuators>
            <sensors>
                <epuck_proximity implementation="default" show_rays="false" noise_level="0.05" calibrated="true"/>
                <epuck_range_and_bearing implementation="medium" medium="rab" data_size="4" nois_std_deviation="1.5" loss_probability="0.85" calibrated="true"/>
                <epuck_light implementation="default" show_rays="false" noise_level="0.05" calibrated="true"/>
                <epuck_ground implementation="rot_z_only" noise_level="0.05" calibrated="true"/>
                <epuck_omnidirectional_camera implementation="rot_z_only" medium="leds" show_rays="false"/>
            </sensors>
      <params bt-config="--nroot 3 --nchildroot 1 --n0 0 --nchild0 2 --n00 6 --c00 5 --p00 0.26 --n01 5 --a01 1 --p01 0"/>
    </automode_controller_bt>
    </controllers>

    <!-- ********* -->
    <!-- * Arena * -->
    <!-- ********* -->
    <arena size="10, 10, 1" center="0,0,0">

        <!-- Change the floor here -->
        <floor id="floor" source="loop_functions" pixels_per_meter="300"/>
        <!-- Change the light here -->
        <!-- Please note that this light has an intensity equal to 0 -->
        <!-- and therefore does not affect the experiment. -->
        <!-- ARGoS requires at least one light when using the light sensor -->
        <light id="light" position="0.0,0.0,0.0" orientation="0,0,0" color="red" intensity="5.0" medium="leds"/>

        <environment-placeholder></environment-placeholder>
        <!-- ********** -->
        <!-- * E-Puck * -->
        <!-- ********** -->

        <!-- The epucks are distributed by the AutoMoDeLoopFunctions. -->
        <distribute>
                <position method="uniform" min="-1.0,-1.0,0" max="1.0,1.0,0"/> <!-- dummy -->
                <orientation method="gaussian" mean="0,0,0" std_dev="360,0,0"/> <!-- dummy -->
                <robots-placeholder></robots-placeholder>
        </distribute>
    </arena>

    <!-- ******************* -->
    <!-- * Physics engines * -->
    <!-- ******************* -->
    <physics_engines>
        <dynamics2d id="dyn2d" />
    </physics_engines>

    <!-- ********* -->
    <!-- * Media * -->
    <!-- ********* -->
    <media>
        <led id="leds" grid_size="1,1,1"/>
        <range_and_bearing id="ircom"/>
        <range_and_bearing id="rab"/>
    </media>

    <!-- ***************** -->
    <!-- * Visualization * -->
    <!-- ***************** -->
  <visualization>
        <qt-opengl>
            <camera>
                <placement idx="0"
                           position="0,0,4.5"
                           look_at="0,0,0"
                           lens_focal_length="30" />
            </camera>
        </qt-opengl>
  </visualization>

</argos-configuration>
"""

# Dataset Preprocessing

converts to prompt format

In [None]:
def generate_prompt(sample, tokenizer):
  messages = [
      {"role": "user", "content": sample["description"]+"\nGenerate the xml configuration for this mission."},
      {"role": "assistant", "content": sample["configuration"]},
  ]

  text = tokenizer.apply_chat_template(messages, tokenize=False) # wraps text with special tokens depending on role (assitant or user)
  return text

In [None]:
dataset["text"] = dataset.apply(lambda x: generate_prompt(x, tokenizer),axis=1)

dataset = dataset.filter(["text"])
dataset.head()

In [None]:
print(dataset.iloc[0].text)

In [None]:
generated_train_dataset, generated_val_dataset = train_test_split(dataset, test_size=0.2)

def to_dataset(df):
  dataset = ds.dataset(pa.Table.from_pandas(df).to_batches())

  ### convert to Huggingface dataset
  hg_dataset = Dataset(pa.Table.from_pandas(df))
  return hg_dataset

generated_train_dataset = to_dataset(generated_train_dataset)
generated_val_dataset = to_dataset(generated_val_dataset)

In [None]:
generated_train_dataset

# LoRA Configuration

In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
def convert_to_lora_pft_mut(model):
    model.gradient_checkpointing_enable()
    model = prepare_model_for_kbit_training(model)
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
            "lm_head",
        ],
        bias="none",
        lora_dropout=0.05,
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, lora_config)
    return lora_config

lora_config = convert_to_lora_pft_mut(model)
print_trainable_parameters(model)

# Model Training

In [None]:
def define_trainer(model, tokenizer, lora_config, train_dataset, eval_dataset):
  training_arguments = TrainingArguments(
      output_dir="./results",
      num_train_epochs=5,
      per_device_train_batch_size=4,
      gradient_accumulation_steps=1,
      optim="paged_adamw_32bit",
      save_strategy="steps",
      save_steps=25,
      logging_steps=25,
      learning_rate=2e-4,
      weight_decay=0.001,
      max_steps=500,
      evaluation_strategy="steps",
      eval_steps=25,
      do_eval=True,
      report_to="none",
  )



  # this appareantly performs reinforcement learning
  # https://pypi.org/project/trl/

  # Setting sft parameters
  trainer = SFTTrainer(
      model=model,
      tokenizer=tokenizer,
      args=training_arguments,
      train_dataset=train_dataset,
      eval_dataset=eval_dataset,
      peft_config=lora_config,
      dataset_text_field="text", # SFTTrainer assumes instruction and response in the same string.
  )

  # necessary for training
  model.config.use_cache = False
  tokenizer.pad_token = tokenizer.eos_token

  return trainer

trainer = define_trainer(model, tokenizer, lora_config, generated_train_dataset, generated_val_dataset)

In [None]:
trainer.train()