# Fine-Tuning Mistral

In [1]:
!python -m pip install git+https://github.com/julianjandeleit/swarm_descriptions.git@b824af4d1e0a97e9029d309687316acecc8000e6

Collecting git+https://github.com/julianjandeleit/swarm_descriptions.git@b824af4d1e0a97e9029d309687316acecc8000e6
  Cloning https://github.com/julianjandeleit/swarm_descriptions.git (to revision b824af4d1e0a97e9029d309687316acecc8000e6) to /tmp/pip-req-build-m7k9z5wi
  Running command git clone --filter=blob:none --quiet https://github.com/julianjandeleit/swarm_descriptions.git /tmp/pip-req-build-m7k9z5wi
  Running command git rev-parse -q --verify 'sha^b824af4d1e0a97e9029d309687316acecc8000e6'
  Running command git fetch -q https://github.com/julianjandeleit/swarm_descriptions.git b824af4d1e0a97e9029d309687316acecc8000e6
  Running command git checkout -q b824af4d1e0a97e9029d309687316acecc8000e6
  Resolved https://github.com/julianjandeleit/swarm_descriptions.git to commit b824af4d1e0a97e9029d309687316acecc8000e6
  Installing build dependencies ... [?25l- \ | / - \ | done
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pypr

In [2]:
!pip install -q -U transformers bitsandbytes peft datasets accelerate trl

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.
cuml 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.
dask-cuda 23.8.0 requires dask==2023.7.1, but you have dask 2023.12.0 which is incompatible.
dask-cuda 23.8.0 requires distributed==2023.7.1, but you have distributed 2023.12.0 which is incompatible.
dask-cuda 23.8.0 requires pa

In [3]:
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from peft import get_peft_model
from transformers import TrainingArguments
from peft import prepare_model_for_kbit_training
from trl import SFTTrainer
from copy import deepcopy
import pandas as pd
from sklearn.model_selection import train_test_split
import pyarrow as pa
import pyarrow.dataset as ds
from datasets import Dataset
import random



In [4]:
from swarm_descriptions.mission_elements import get_generators, MissionParams
from swarm_descriptions.configfiles import config_to_string
from swarm_descriptions.utils import truncate_floats

# Loading the Model

In [5]:
def load_checkpoint(path: str):

  # this should make it fit to vram of gpu
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=False,
      bnb_4bit_compute_dtype=torch.bfloat16
  )

  model = AutoModelForCausalLM.from_pretrained(
    path,
    load_in_4bit=True,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
  )

  tokenizer = AutoTokenizer.from_pretrained(
    path
  )

  return model, tokenizer



base_model = "mistralai/Mistral-7B-Instruct-v0.2"
model, tokenizer = load_checkpoint(base_model)

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

# Loading the Dataset

In [6]:
generators = get_generators()
MissionParams.sample(*generators)

MissionParams(arena_params=CircularArena(radius=2.6804963097821566, height=1.0140310290689012, num_walls=6), lights_params=UniformLights(lights=[LightConfig(x=0.30562297257765203, y=0.3282833753831682, intensity=3.5154231771149003)]), robots_params=CenteredSwarm(radius=0.9802510479045772, num_robots=18), objective_params=Distribution(max_connection_range=0.06152077977601561, area_width=3.034114252057761, area_length=2.2151668600477166))

In [7]:
n_rows = 2500
rows = []
for n in range(n_rows):
    mission = MissionParams.sample(*generators)
    conf = config_to_string(mission.configure())
    conf = truncate_floats(conf)
    desc = random.sample(mission.describe(),1)[0]  
    desc = truncate_floats(desc)
    rows.append({"description": desc, "configuration": conf, "parameters": mission})


dataset = pd.DataFrame(rows)
dataset.head()

Unnamed: 0,description,configuration,parameters
0,A circle with 19 walls forms the structure of ...,"<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=CircularArena(radiu...
1,The area is a rectangle with dimensions 2.62 x...,"<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=RectangularArena(le...
2,The area is a rectangle with dimensions 3.82 x...,"<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=RectangularArena(le...
3,"The circular arena, having a radius of 3.28 me...","<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=CircularArena(radiu...
4,The environment consists of a rectangular area...,"<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=RectangularArena(le...


In [8]:
print(dataset.iloc[10].description)

The area is a rectangle with dimensions 7.01 x 2.20 x 2.09.In the arena, 1 lights are evenly spread out with intensities 3.20. 25 robots are evenly placed around the center, covering a radius of 0.56 meters. The robots should keep a pairwise distance not larger than 0.12 and cover an area of 0.52 x 3.12 m². 


In [9]:
print(dataset.iloc[10].configuration)

<?xml version="1.00" ?>
<config>
  <swarm-elems>
    <entity quantity="25" max_trials="100">
      <e-puck id="epuck">
        <controller config="automode_bt"/>
      </e-puck>
    </entity>
  </swarm-elems>
  <env-elems>
    <light id="light_0" position="0.10,0.23,0.00" orientation="360,0,0" color="yellow" intensity="3.20" medium="leds"/>
    <box id="wall_0" size="0.01,2.20,0.10" movable="false">
      <body position="0,-3.51,0.00" orientation="90.00,0.00,0.00"/>
    </box>
    <box id="wall_1" size="0.01,7.01,0.10" movable="false">
      <body position="1.10,0,0.00" orientation="0.00,0.00,0.00"/>
    </box>
    <box id="wall_2" size="0.01,2.20,0.10" movable="false">
      <body position="0,3.51,0" orientation="90.00,0.00,0.00"/>
    </box>
    <box id="wall_3" size="0.01,7.01,0.10" movable="false">
      <body position="-1.10,0,0" orientation="0,0.00,0.00"/>
    </box>
    <arena-attrib size="2.20,7.01,2.09"/>
  </env-elems>
  <objective-elems>
    <spawnCircle position="0,0,0" rad

In [10]:
dataset.iloc[10].parameters.objective_params.configure().type

'distribution'

In [11]:
argos_template = """<?xml version="1.0" ?>
<argos-configuration>
    <!-- ************* -->
    <!-- * Framework * -->
    <!-- ************* -->
    <framework>
        <experiment length="120" ticks_per_second="10" random_seed="0"/>
    </framework>

    <!-- ****************** -->
    <!-- * Loop functions * -->
    <!-- ****************** -->
    <loop_functions library="/opt/argos/AutoMoDe-loopfunctions/build/loop-functions/custom-loopfunctions/libcustom_loopfunc.so" label="template">
        <loop-placeholder></loop-placeholder>
    </loop_functions>

    <!-- *************** -->
    <!-- * Controllers * -->
    <!-- *************** -->
    <controllers>
    <automode_controller_bt id="automode_bt" library="/opt/argos/AutoMoDe/build/src/libautomode_bt.so">
            <actuators>
                <epuck_wheels implementation="default" noise_std_dev="0.05"/>
                <epuck_rgb_leds implementation="default" medium="leds"/>
                <epuck_range_and_bearing implementation="medium" medium="rab" data_size="4" range="0.7"/>
            </actuators>
            <sensors>
                <epuck_proximity implementation="default" show_rays="false" noise_level="0.05" calibrated="true"/>
                <epuck_range_and_bearing implementation="medium" medium="rab" data_size="4" nois_std_deviation="1.5" loss_probability="0.85" calibrated="true"/>
                <epuck_light implementation="default" show_rays="false" noise_level="0.05" calibrated="true"/>
                <epuck_ground implementation="rot_z_only" noise_level="0.05" calibrated="true"/>
                <epuck_omnidirectional_camera implementation="rot_z_only" medium="leds" show_rays="false"/>
            </sensors>
      <params bt-config="--nroot 3 --nchildroot 1 --n0 0 --nchild0 2 --n00 6 --c00 5 --p00 0.26 --n01 5 --a01 1 --p01 0"/>
    </automode_controller_bt>
    </controllers>

    <!-- ********* -->
    <!-- * Arena * -->
    <!-- ********* -->
    <arena size="10, 10, 1" center="0,0,0">

        <!-- Change the floor here -->
        <floor id="floor" source="loop_functions" pixels_per_meter="300"/>
        <!-- Change the light here -->
        <!-- Please note that this light has an intensity equal to 0 -->
        <!-- and therefore does not affect the experiment. -->
        <!-- ARGoS requires at least one light when using the light sensor -->
        <light id="light" position="0.0,0.0,0.0" orientation="0,0,0" color="red" intensity="5.0" medium="leds"/>

        <environment-placeholder></environment-placeholder>
        <!-- ********** -->
        <!-- * E-Puck * -->
        <!-- ********** -->

        <!-- The epucks are distributed by the AutoMoDeLoopFunctions. -->
        <distribute>
                <position method="uniform" min="-1.0,-1.0,0" max="1.0,1.0,0"/> <!-- dummy -->
                <orientation method="gaussian" mean="0,0,0" std_dev="360,0,0"/> <!-- dummy -->
                <robots-placeholder></robots-placeholder>
        </distribute>
    </arena>

    <!-- ******************* -->
    <!-- * Physics engines * -->
    <!-- ******************* -->
    <physics_engines>
        <dynamics2d id="dyn2d" />
    </physics_engines>

    <!-- ********* -->
    <!-- * Media * -->
    <!-- ********* -->
    <media>
        <led id="leds" grid_size="1,1,1"/>
        <range_and_bearing id="ircom"/>
        <range_and_bearing id="rab"/>
    </media>

    <!-- ***************** -->
    <!-- * Visualization * -->
    <!-- ***************** -->
  <visualization>
        <qt-opengl>
            <camera>
                <placement idx="0"
                           position="0,0,4.5"
                           look_at="0,0,0"
                           lens_focal_length="30" />
            </camera>
        </qt-opengl>
  </visualization>

</argos-configuration>
"""

# Dataset Preprocessing

converts to prompt format

In [12]:
def generate_prompt(sample, tokenizer):
  messages = [
      {"role": "user", "content": sample["description"]+"\nGenerate the xml configuration for this mission."},
      {"role": "assistant", "content": sample["configuration"]},
  ]

  text = tokenizer.apply_chat_template(messages, tokenize=False) # wraps text with special tokens depending on role (assitant or user)
  return text

In [13]:
dataset["text"] = dataset.apply(lambda x: generate_prompt(x, tokenizer),axis=1)

dataset = dataset.filter(["text"])
dataset.head()

Unnamed: 0,text
0,<s>[INST] A circle with 19 walls forms the str...
1,<s>[INST] The area is a rectangle with dimensi...
2,<s>[INST] The area is a rectangle with dimensi...
3,"<s>[INST] The circular arena, having a radius ..."
4,<s>[INST] The environment consists of a rectan...


In [14]:
print(dataset.iloc[0].text)

<s>[INST] A circle with 19 walls forms the structure of the environment. Evenly distributed throughout the environment are 4 lights. Their positions are ((0.58, 0.36), (0.34, -0.13), (-1.06, -0.91), (1.12, -0.66)). 18 robots are evenly placed around the center, covering a radius of 1.25 meters. The robots' goal is to meet at the white circle. In the arena, you'll find two areas: a circle at [-0.06, -0.56] with a radius of 0.31 meters and another circle at [-0.29, 0.01] with a radius of 0.63 meters. 
Generate the xml configuration for this mission. [/INST]<?xml version="1.00" ?>
<config>
  <swarm-elems>
    <entity quantity="18" max_trials="100">
      <e-puck id="epuck">
        <controller config="automode_bt"/>
      </e-puck>
    </entity>
  </swarm-elems>
  <env-elems>
    <light id="light_0" position="0.58,0.36,0.00" orientation="360,0,0" color="yellow" intensity="4.15" medium="leds"/>
    <light id="light_1" position="0.34,-0.13,0.00" orientation="360,0,0" color="yellow" intensit

In [15]:
generated_train_dataset, generated_val_dataset = train_test_split(dataset, test_size=0.2)

def to_dataset(df):
  dataset = ds.dataset(pa.Table.from_pandas(df).to_batches())

  ### convert to Huggingface dataset
  hg_dataset = Dataset(pa.Table.from_pandas(df))
  return hg_dataset

generated_train_dataset = to_dataset(generated_train_dataset)
generated_val_dataset = to_dataset(generated_val_dataset)

In [16]:
generated_train_dataset

Dataset({
    features: ['text', '__index_level_0__'],
    num_rows: 2000
})

# LoRA Configuration

In [17]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [18]:
def convert_to_lora_pft_mut(model):
    model.gradient_checkpointing_enable()
    model = prepare_model_for_kbit_training(model)
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
            "lm_head",
        ],
        bias="none",
        lora_dropout=0.05,
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, lora_config)
    return lora_config

lora_config = convert_to_lora_pft_mut(model)
print_trainable_parameters(model)

trainable params: 21260288 || all params: 3773331456 || trainable%: 0.5634354746703705


# Model Training

In [19]:
def define_trainer(model, tokenizer, lora_config, train_dataset, eval_dataset):
  training_arguments = TrainingArguments(
      output_dir="./results",
      num_train_epochs=2,
      per_device_train_batch_size=4,
      gradient_accumulation_steps=1,
      optim="paged_adamw_32bit",
      save_strategy="steps",
      save_steps=25,
      logging_steps=25,
      learning_rate=2e-4,
      weight_decay=0.001,
      max_steps=400,
      evaluation_strategy="steps",
      eval_steps=25,
      do_eval=True,
      report_to="none",
  )



  # this appareantly performs reinforcement learning
  # https://pypi.org/project/trl/

  # Setting sft parameters
  trainer = SFTTrainer(
      model=model,
      tokenizer=tokenizer,
      args=training_arguments,
      train_dataset=train_dataset,
      eval_dataset=eval_dataset,
      peft_config=lora_config,
      dataset_text_field="text", # SFTTrainer assumes instruction and response in the same string.
  )

  # necessary for training
  model.config.use_cache = False
  tokenizer.pad_token = tokenizer.eos_token

  return trainer

trainer = define_trainer(model, tokenizer, lora_config, generated_train_dataset, generated_val_dataset)



Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]



In [20]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
25,0.4228,0.221644
50,0.1924,0.174804
75,0.1632,0.161552
100,0.1517,0.153756
125,0.1472,0.148764
150,0.1444,0.144602
175,0.1435,0.142639
200,0.1422,0.140508
225,0.136,0.138366
250,0.1339,0.137547




TrainOutput(global_step=400, training_loss=0.16050442695617675, metrics={'train_runtime': 26539.0499, 'train_samples_per_second': 0.06, 'train_steps_per_second': 0.015, 'total_flos': 6.933270726392218e+16, 'train_loss': 0.16050442695617675, 'epoch': 0.8})