# Fine-Tuning Mistral

In [1]:
!pip install -q -U transformers bitsandbytes peft datasets accelerate trl

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.3/8.3 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.9/133.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━

In [85]:
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from peft import get_peft_model
from transformers import TrainingArguments
from peft import prepare_model_for_kbit_training
from trl import SFTTrainer
from copy import deepcopy
import pandas as pd
from sklearn.model_selection import train_test_split
import pyarrow as pa
import pyarrow.dataset as ds
from datasets import Dataset

In [3]:
!pip install git+https://github.com/julianjandeleit/swarm_descriptions.git@v0.0.1

Collecting git+https://github.com/julianjandeleit/swarm_descriptions.git@v0.0.1
  Cloning https://github.com/julianjandeleit/swarm_descriptions.git (to revision v0.0.1) to /tmp/pip-req-build-wryuezkn
  Running command git clone --filter=blob:none --quiet https://github.com/julianjandeleit/swarm_descriptions.git /tmp/pip-req-build-wryuezkn
  Running command git checkout -q 98ed37c8188696cbb5e664a6e9f94cd2304a2d9a
  Resolved https://github.com/julianjandeleit/swarm_descriptions.git to commit 98ed37c8188696cbb5e664a6e9f94cd2304a2d9a
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [18]:
from swarm_descriptions.utils import sample_describer_missions
from swarm_descriptions.configfiles import Configurator, ET, config_to_string
from swarm_descriptions import missions
from swarm_descriptions import descriptions

# Loading the Model

In [6]:
def load_checkpoint(path: str):

  # this should make it fit to vram of gpu
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=False,
      bnb_4bit_compute_dtype=torch.bfloat16
  )

  model = AutoModelForCausalLM.from_pretrained(
    path,
    load_in_4bit=True,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
  )

  tokenizer = AutoTokenizer.from_pretrained(
    path
  )

  return model, tokenizer



base_model = "mistralai/Mistral-7B-Instruct-v0.2"
model, tokenizer = load_checkpoint(base_model)

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

# Loading the Dataset

In [68]:
  dm_modules = [
        (missions.aggregation, descriptions.aggregation),
        (missions.flocking, descriptions.flocking),
        (missions.foraging, descriptions.foraging),
        (missions.distribution, descriptions.distribution),
        (missions.connection, descriptions.connection)
    ]

In [69]:
n_rows = 1000
rows = []
for n in range(n_rows):
  sample_describer_missions(dm_modules)

  describer, get_mission, params, modules = sample_describer_missions(dm_modules)
  rows.append({"describer": describer, "get_mission": get_mission, "params_type": params, "params": params, "mission_type": modules[0], "description_type": modules[1]})


dataset = pd.DataFrame(rows)
dataset.head()

Unnamed: 0,describer,get_mission,params_type,params,mission_type,description_type
0,<function describer_1 at 0x7fc6b0da2830>,<function get_mission at 0x7fc6b0ee3760>,"ConnectionParams(conn_start='light_1', conn_en...","ConnectionParams(conn_start='light_1', conn_en...",<module 'swarm_descriptions.missions.connectio...,<module 'swarm_descriptions.descriptions.conne...
1,<function describer_1 at 0x7fc6b0da2200>,<function get_mission at 0x7fc6b0e236d0>,DistributionParams(distr_area=(3.7768616938548...,DistributionParams(distr_area=(3.7768616938548...,<module 'swarm_descriptions.missions.distribut...,<module 'swarm_descriptions.descriptions.distr...
2,<function describer_1 at 0x7fc6b0da15a0>,<function get_mission at 0x7fc6b0ee3400>,FlockingParams(flocking_density=3.114481720941...,FlockingParams(flocking_density=3.114481720941...,<module 'swarm_descriptions.missions.flocking'...,<module 'swarm_descriptions.descriptions.flock...
3,<function describer_3 at 0x7fc6b0da2320>,<function get_mission at 0x7fc6b0e236d0>,DistributionParams(distr_area=(3.2127722957648...,DistributionParams(distr_area=(3.2127722957648...,<module 'swarm_descriptions.missions.distribut...,<module 'swarm_descriptions.descriptions.distr...
4,<function describer_3 at 0x7fc6b0da1000>,<function get_mission at 0x7fc6b0e23d90>,AggregationParams(agg_radius=1.329128005402312...,AggregationParams(agg_radius=1.329128005402312...,<module 'swarm_descriptions.missions.aggregati...,<module 'swarm_descriptions.descriptions.aggre...


In [70]:
argos_template = """<?xml version="1.0" ?>
<argos-configuration>
    <!-- ************* -->
    <!-- * Framework * -->
    <!-- ************* -->
    <framework>
        <experiment length="120" ticks_per_second="10" random_seed="0"/>
    </framework>

    <!-- ****************** -->
    <!-- * Loop functions * -->
    <!-- ****************** -->
    <loop_functions library="/opt/argos/AutoMoDe-loopfunctions/build/loop-functions/custom/libcustom_loopfunc.so" label="template">
        <loop-placeholder></loop-placeholder>
    </loop_functions>

    <!-- *************** -->
    <!-- * Controllers * -->
    <!-- *************** -->
    <controllers>
    <automode_controller_bt id="automode_bt" library="/opt/argos/AutoMoDe/build/src/libautomode_bt.so">
            <actuators>
                <epuck_wheels implementation="default" noise_std_dev="0.05"/>
                <epuck_rgb_leds implementation="default" medium="leds"/>
                <epuck_range_and_bearing implementation="medium" medium="rab" data_size="4" range="0.7"/>
            </actuators>
            <sensors>
                <epuck_proximity implementation="default" show_rays="false" noise_level="0.05" calibrated="true"/>
                <epuck_range_and_bearing implementation="medium" medium="rab" data_size="4" nois_std_deviation="1.5" loss_probability="0.85" calibrated="true"/>
                <epuck_light implementation="default" show_rays="false" noise_level="0.05" calibrated="true"/>
                <epuck_ground implementation="rot_z_only" noise_level="0.05" calibrated="true"/>
                <epuck_omnidirectional_camera implementation="rot_z_only" medium="leds" show_rays="false"/>
            </sensors>
      <params bt-config="--nroot 3 --nchildroot 1 --n0 0 --nchild0 2 --n00 6 --c00 5 --p00 0.26 --n01 5 --a01 1 --p01 0"/>
    </automode_controller_bt>
    </controllers>

    <!-- ********* -->
    <!-- * Arena * -->
    <!-- ********* -->
    <arena size="10, 10, 1" center="0,0,0">

        <!-- Change the floor here -->
        <floor id="floor" source="loop_functions" pixels_per_meter="300"/>
        <!-- Change the light here -->
        <!-- Please note that this light has an intensity equal to 0 -->
        <!-- and therefore does not affect the experiment. -->
        <!-- ARGoS requires at least one light when using the light sensor -->
        <light id="light" position="0,-1.35,0.45" orientation="0,0,0" color="yellow" intensity="5.0" medium="leds"/>

        <environment-placeholder></environment-placeholder>
        <!-- ********** -->
        <!-- * E-Puck * -->
        <!-- ********** -->

        <!-- The epucks are distributed by the AutoMoDeLoopFunctions. -->
        <distribute>
                <robots-placeholder></robots-placeholder>
        </distribute>
    </arena>

    <!-- ******************* -->
    <!-- * Physics engines * -->
    <!-- ******************* -->
    <physics_engines>
        <dynamics2d id="dyn2d" />
    </physics_engines>

    <!-- ********* -->
    <!-- * Media * -->
    <!-- ********* -->
    <media>
        <led id="leds" grid_size="1,1,1"/>
        <range_and_bearing id="ircom"/>
        <range_and_bearing id="rab"/>
    </media>

    <!-- ***************** -->
    <!-- * Visualization * -->
    <!-- ***************** -->
  <visualization>
        <qt-opengl>
            <camera>
                <placement idx="0"
                           position="0,0,4.5"
                           look_at="0,0,0"
                           lens_focal_length="30" />
            </camera>
        </qt-opengl>
  </visualization>

</argos-configuration>

"""

# Dataset Preprocessing

converts to prompt format

In [71]:
dataset["instruction"] = dataset[["describer", "params"]].apply(lambda row: row[0](row[1]), axis=1)

In [72]:
def generate_config(row, argos_template=argos_template):
  get_mission, params = row
  skeleton = ET.fromstring(argos_template.strip())
  config = Configurator().generate_config(skeleton, get_mission(params))
  return config_to_string(config)

dataset["response"] = dataset[["get_mission", "params"]].apply(generate_config, axis=1)

In [73]:
dataset.head()

Unnamed: 0,describer,get_mission,params_type,params,mission_type,description_type,instruction,response
0,<function describer_1 at 0x7fc6b0da2830>,<function get_mission at 0x7fc6b0ee3760>,"ConnectionParams(conn_start='light_1', conn_en...","ConnectionParams(conn_start='light_1', conn_en...",<module 'swarm_descriptions.missions.connectio...,<module 'swarm_descriptions.descriptions.conne...,The mission of the swarm is connection. It tak...,"<?xml version=""1.00"" ?>\n<argos-configuration>..."
1,<function describer_1 at 0x7fc6b0da2200>,<function get_mission at 0x7fc6b0e236d0>,DistributionParams(distr_area=(3.7768616938548...,DistributionParams(distr_area=(3.7768616938548...,<module 'swarm_descriptions.missions.distribut...,<module 'swarm_descriptions.descriptions.distr...,The mission of the swarm is distribution. It t...,"<?xml version=""1.00"" ?>\n<argos-configuration>..."
2,<function describer_1 at 0x7fc6b0da15a0>,<function get_mission at 0x7fc6b0ee3400>,FlockingParams(flocking_density=3.114481720941...,FlockingParams(flocking_density=3.114481720941...,<module 'swarm_descriptions.missions.flocking'...,<module 'swarm_descriptions.descriptions.flock...,The mission of the swarm is flocking. It takes...,"<?xml version=""1.00"" ?>\n<argos-configuration>..."
3,<function describer_3 at 0x7fc6b0da2320>,<function get_mission at 0x7fc6b0e236d0>,DistributionParams(distr_area=(3.2127722957648...,DistributionParams(distr_area=(3.2127722957648...,<module 'swarm_descriptions.missions.distribut...,<module 'swarm_descriptions.descriptions.distr...,"A cohesive swarm, comprised of 11 robots, unde...","<?xml version=""1.00"" ?>\n<argos-configuration>..."
4,<function describer_3 at 0x7fc6b0da1000>,<function get_mission at 0x7fc6b0e23d90>,AggregationParams(agg_radius=1.329128005402312...,AggregationParams(agg_radius=1.329128005402312...,<module 'swarm_descriptions.missions.aggregati...,<module 'swarm_descriptions.descriptions.aggre...,"A cohesive swarm, comprised of 11 robots, unde...","<?xml version=""1.00"" ?>\n<argos-configuration>..."


In [74]:
def generate_prompt(sample, tokenizer):
  messages = [
      {"role": "user", "content": (sample["context"]+" " if sample.get("context") else "") + sample["instruction"]+"\nGenerate the xml configuration for this mission."},
      {"role": "assistant", "content": sample["response"]},
  ]

  text = tokenizer.apply_chat_template(messages, tokenize=False) # wraps text with special tokens depending on role (assitant or user)
  return text

In [75]:
dataset["text"] = dataset[["instruction","response"]].apply(lambda x: generate_prompt(x, tokenizer),axis=1)

dataset = dataset.filter(["text"])
dataset.head()

Unnamed: 0,text
0,<s>[INST] The mission of the swarm is connecti...
1,<s>[INST] The mission of the swarm is distribu...
2,<s>[INST] The mission of the swarm is flocking...
3,"<s>[INST] A cohesive swarm, comprised of 11 ro..."
4,"<s>[INST] A cohesive swarm, comprised of 11 ro..."


In [76]:
print(dataset.iloc[0].text)

<s>[INST] The mission of the swarm is connection. It takes place inside an environment of size (15.35, 15.35, 2.19). Inside of the environment are walls in a circular shape. The walls form a circle with radius 7.68 made out of 9 walls.  Additionally, there are two lights inside of the environment. The first light is placed at (-0.04, -1.71, 0.00). The second light is placed at (-3.49, -4.92, 0.00). The swarm consists of 15 robots. The objective of this mission is to connect light_1 with light_2 by forming a line of robots between those. Robots should not be too close in order to be a proper line. Robots are to close if their distance is below 0.11. 
Generate the xml configuration for this mission:
 [/INST]<?xml version="1.00" ?>
<argos-configuration>
  <framework>
    <experiment length="120" ticks_per_second="10" random_seed="0"/>
  </framework>
  <loop_functions library="/opt/argos/AutoMoDe-loopfunctions/build/loop-functions/custom/libcustom_loopfunc.so" label="template">
    <params

In [90]:
generated_train_dataset, generated_val_dataset = train_test_split(dataset, test_size=0.2)

def to_dataset(df):
  dataset = ds.dataset(pa.Table.from_pandas(df).to_batches())

  ### convert to Huggingface dataset
  hg_dataset = Dataset(pa.Table.from_pandas(df))
  return hg_dataset

generated_train_dataset = to_dataset(generated_train_dataset)
generated_val_dataset = to_dataset(generated_val_dataset)

In [91]:
generated_train_dataset

Dataset({
    features: ['text', '__index_level_0__'],
    num_rows: 800
})

# LoRA Configuration

In [92]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [80]:
def convert_to_lora_pft_mut(model):
    model.gradient_checkpointing_enable()
    model = prepare_model_for_kbit_training(model)
    lora_config = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
            "lm_head",
        ],
        bias="none",
        lora_dropout=0.05,
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, lora_config)
    return lora_config

lora_config = convert_to_lora_pft_mut(model)
print_trainable_parameters(model)

trainable params: 21260288 || all params: 3773331456 || trainable%: 0.5634354746703705


# Model Training

In [93]:
def define_trainer(model, tokenizer, lora_config, train_dataset, eval_dataset):
  training_arguments = TrainingArguments(
      output_dir="./results",
      num_train_epochs=1,
      per_device_train_batch_size=4,
      gradient_accumulation_steps=1,
      optim="paged_adamw_32bit",
      save_strategy="steps",
      save_steps=25,
      logging_steps=25,
      learning_rate=2e-4,
      weight_decay=0.001,
      max_steps=250,
      evaluation_strategy="steps",
      eval_steps=25,
      do_eval=True,
      report_to="none",
  )



  # this appareantly performs reinforcement learning
  # https://pypi.org/project/trl/

  # Setting sft parameters
  trainer = SFTTrainer(
      model=model,
      tokenizer=tokenizer,
      args=training_arguments,
      train_dataset=train_dataset,
      eval_dataset=eval_dataset,
      peft_config=lora_config,
      dataset_text_field="text", # SFTTrainer assumes instruction and response in the same string.
  )

  # necessary for training
  model.config.use_cache = False
  tokenizer.pad_token = tokenizer.eos_token

  return trainer

trainer = define_trainer(model, tokenizer, lora_config, generated_train_dataset, generated_val_dataset)



Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]



In [None]:
trainer.train()

Step,Training Loss,Validation Loss
