In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

secret_value_0 = os.getenv('HF_TOKEN')

from huggingface_hub import login
login(token = secret_value_0)

  from .autonotebook import tqdm as notebook_tqdm
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
# 0. imports
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, AutoModelForCausalLM

from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=3, # smaller lora dimension? original 16
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)


# 1. load a pretrained model
model_name = "mistralai/Mathstral-7B-v0.1"
#model_name = "gpt2"

compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)


model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                          quantization_config=bnb_config,)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
#tokenizer.pad_token = tokenizer.unk_token

model = get_peft_model(model, lora_config)


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 6/6 [00:27<00:00,  4.57s/it]


## dataset loading

In [4]:
import pandas as pd
from swarm_descriptions.mission_elements import get_generators, MissionParams
from swarm_descriptions.configfiles import config_to_string
from swarm_descriptions.utils import truncate_floats
import random
import pyarrow as pa
import pyarrow.dataset as ds
import pickle
import numpy as np
import random
random.seed(42)
np.random.seed(42)


with open("../ressources/automode_descriptions_evaluated.pickle","rb") as file:
    dataset = pickle.load(file)

dataset["type"] = dataset["parameters"].map(lambda x: type(x.objective_params).__name__)
dataset["original_index"] = dataset.index
dataset = dataset.dropna()

dataset["z-scores"] = dataset["scores"].map(lambda x: (x - np.mean(x)) / np.std(x))
dataset["coeff_of_var"] = dataset["scores"].map(lambda x: x+abs(np.min(x)) + 1).map(lambda x: np.std(x)/np.mean(x))



dataset.head()

Unnamed: 0,description,configuration,parameters,argos,behavior_tree,scores,avg_score,type,original_index,z-scores,coeff_of_var
0,The environment consists of a circular arena w...,"<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=CircularArena(radiu...,"<?xml version=""1.00"" ?>\n<argos-configuration>...",--nroot 3 --nchildroot 3 --n0 0 --nchild0 2 --...,"[-9.5415, -7.86722, -2.30625, -2.53487, -2.216...",-4.89684,Connection,0,"[-1.6687805956231354, -1.067228280568879, 0.93...",0.493079
1,The environment is a rectangular area with len...,"<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=RectangularArena(le...,"<?xml version=""1.00"" ?>\n<argos-configuration>...",--nroot 3 --nchildroot 4 --n0 0 --nchild0 2 --...,"[0.1636, 1.0646, 0.2188, 0.1203, 0.1807, 0.252...",0.26047,Foraging,1,"[-0.35621753561939246, 2.9570063685106027, -0....",0.203021
2,"With a radius of 2.08 meters, the circular are...","<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=CircularArena(radiu...,"<?xml version=""1.00"" ?>\n<argos-configuration>...",--nroot 3 --nchildroot 3 --n0 0 --nchild0 2 --...,"[-11383.4, -12173.4, -7285.27, -11960.6, -1308...",-11208.091,Distribution,2,"[-0.0626823752619009, -0.3451497697305358, 1.4...",0.471759
3,"A rectangular area, with a length of 2.43 mete...","<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=RectangularArena(le...,"<?xml version=""1.00"" ?>\n<argos-configuration>...",--nroot 3 --nchildroot 4 --n0 0 --nchild0 2 --...,"[609.375, 545.25, 150.0, 362.625, 601.875, 426...",490.3875,Aggregation,3,"[0.6527075242914112, 0.3009489782660998, -1.86...",0.284225
5,The environment is a circle made out of 20 wal...,"<?xml version=""1.00"" ?>\n<config>\n <swarm-el...",MissionParams(arena_params=CircularArena(radiu...,"<?xml version=""1.00"" ?>\n<argos-configuration>...",--nroot 3 --nchildroot 4 --n0 0 --nchild0 2 --...,"[0.5695, 1.6495, 1.7125, 0.3495, 0.366, 0.7707...",0.78491,Foraging,5,"[-0.4647886528828779, 1.8655198059329066, 2.00...",0.217136


In [None]:
print(dataset.iloc[10].description)
print(dataset.iloc[10].behavior_tree)

In this setting, a rectangle is formed with dimensions 5.12 x 6.39 x 1.09.The arena features 2 lights: (1.73, 0.70, 3.57), (1.54, 0.26, 4.57). Around the central point, 7 robots are positioned uniformly within a 1.29-meter radius. In the environment, you'll find a circle at [-0.45, -2.35] with a radius of 0.39 meters, characterized by its white hue. There's also another circle at [2.77, 2.42] with a radius of 0.27 meters in black. The robots are assigned the goal of moving items from the black starting zone to the white circle. 
--nroot 3 --nchildroot 4 --n0 0 --nchild0 2 --n00 6 --c00 2 --p00 0.1132 --n01 5 --a01 3 --p01 0 --n1 0 --nchild1 2 --n10 6 --c10 5 --p10 0.2426 --n11 5 --a11 3 --p11 0 --n2 0 --nchild2 2 --n20 6 --c20 1 --p20 0.9688 --n21 5 --a21 2 --p21 0 --n3 0 --nchild3 2 --n30 6 --c30 0 --p30 0.4093 --n31 5 --a31 1 --p31 0


In [6]:
import re
# cf. NumeroLogic: https://arxiv.org/html/2404.00459v1

def encode_number(text):
    def f(match):
        num = match.group(0)  # The entire matched number
        i = match.group(1)    # The integer part of the number
        li = len(i)           # Length of the integer part
        d = match.group(3)    # The decimal part of the number (if any)
        ld = len(d) if d else 0  # Length of the decimal part, default to 0 if None
        
        if d:
            prefix = f'<sn>{li}.{ld}<mn>'
        else:
            prefix = f'<sn>{li}<mn>'
        
        return prefix + num + '<en>'
    
    pattern = r'(\d+)(\.(\d+))?'  # Regular expression pattern to match numbers
    return re.sub(pattern, f, text)

def decode_number(text):
    pattern = r'<sn>[\d\.]+<mn>'  # Pattern to match the processed number format
    text = re.sub(pattern, '', text)  # Remove the <sn> and <mn> tags
    text = re.sub(r'<en>', '', text)   # Remove the <en> tag
    return text


#special_tokens_dict = {'additional_special_tokens': ['<sn>', '<mn>', '<en>']}
#tokenizer.add_special_tokens(special_tokens_dict)

#tokenizer.save_pretrained('path/to/save/tokenizer')
# define features and labels
dataset["llm_input"] = dataset["description"].map(lambda x: encode_number(x))
dataset["llm_output"] = dataset["behavior_tree"].map(lambda x: encode_number(x))

In [7]:
def generate_prompt(sample, tokenizer):
  messages = [
      {"role": "user", "content": sample["llm_input"]+"\nGenerate the behavior tree that achieves the objective of this mission."},
      {"role": "assistant", "content": str(sample["llm_output"])},
  ]

  text = tokenizer.apply_chat_template(messages, tokenize=True, truncation=True, return_dict=True) # wraps text with special tokens depending on role (assitant or user)
  return text["input_ids"][1:]

dataset["tokens"] = dataset.apply(lambda x: generate_prompt(x, tokenizer),axis=1)
dataset["text"] = dataset.apply(lambda x: tokenizer.decode(x.tokens), axis=1)

dataset = dataset.filter(["tokens","text","original_index"])
dataset.head()

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Unnamed: 0,tokens,text,original_index
0,"[3, 1183, 5234, 13103, 1070, 1032, 21207, 2482...",[INST] The environment consists of a circular ...,0
1,"[3, 1183, 5234, 1117, 1032, 10485, 9453, 3466,...",[INST] The environment is a rectangular area w...,1
2,"[3, 3094, 1032, 14398, 1070, 1291, 7481, 29535...",[INST] With a radius of <sn>1.2<mn>2.08<en> me...,2
3,"[3, 1098, 10485, 9453, 3466, 29493, 1163, 1032...","[INST] A rectangular area, with a length of <s...",3
5,"[3, 1183, 5234, 1117, 1032, 10429, 2037, 1343,...",[INST] The environment is a circle made out of...,5


In [8]:
decode_number(dataset.iloc[0].text)

'[INST] The environment consists of a circular arena with radius 1.89, made out of 24 walls. The environment is 2.47 high. There are the following lights in the arena: ((-1.11, -0.21), (-0.75, 0.01), (-0.81, 0.40), (-0.75, 0.24)). 10 robots are evenly distributed around the origin within a radius of 0.67 m. The objective for the robots is to connect both circles from white to black, maintaining a distance just under 0.15 m. There are two circles on the floor—one at [0.71, -1.06] with a radius of 0.41 meters, colored in white, and another at [-0.38, -0.42] with a radius of 0.36 meters in black. \nGenerate the behavior tree that achieves the objective of this mission.[/INST] --nroot 3 --nchildroot 3 --n0 0 --nchild0 2 --n00 6 --c00 0 --p00 0.4028 --n01 5 --a01 4 --att01 2.6855 --p01 0 --n1 0 --nchild1 2 --n10 6 --c10 1 --p10 0.1656 --n11 5 --a11 1 --p11 0 --n2 0 --nchild2 2 --n20 6 --c20 1 --p20 0.5189 --n21 5 --a21 3 --p21 0</s>'

In [9]:
print(dataset.iloc[0].text)

[INST] The environment consists of a circular arena with radius <sn>1.2<mn>1.89<en>, made out of <sn>2<mn>24<en> walls. The environment is <sn>1.2<mn>2.47<en> high. There are the following lights in the arena: ((-<sn>1.2<mn>1.11<en>, -<sn>1.2<mn>0.21<en>), (-<sn>1.2<mn>0.75<en>, <sn>1.2<mn>0.01<en>), (-<sn>1.2<mn>0.81<en>, <sn>1.2<mn>0.40<en>), (-<sn>1.2<mn>0.75<en>, <sn>1.2<mn>0.24<en>)). <sn>2<mn>10<en> robots are evenly distributed around the origin within a radius of <sn>1.2<mn>0.67<en> m. The objective for the robots is to connect both circles from white to black, maintaining a distance just under <sn>1.2<mn>0.15<en> m. There are two circles on the floor—one at [<sn>1.2<mn>0.71<en>, -<sn>1.2<mn>1.06<en>] with a radius of <sn>1.2<mn>0.41<en> meters, colored in white, and another at [-<sn>1.2<mn>0.38<en>, -<sn>1.2<mn>0.42<en>] with a radius of <sn>1.2<mn>0.36<en> meters in black. 
Generate the behavior tree that achieves the objective of this mission.[/INST] --nroot <sn>1<mn>3<en> -

convert dataset to huggingface dataset

In [10]:
from sklearn.model_selection import train_test_split
from datasets import Dataset
generated_train_dataset, generated_val_dataset = train_test_split(dataset, test_size=0.2)

def to_dataset(df):
  dataset = ds.dataset(pa.Table.from_pandas(df).to_batches())

  ### convert to Huggingface dataset
  hg_dataset = Dataset(pa.Table.from_pandas(df))
  return hg_dataset

# def to_ppo_dataset(df):
#     hg_dataset = Dataset(pa.Table.from_pandas(df))

#generated_train_dataset = to_dataset(generated_train_dataset.head(250))
#generated_val_dataset = to_dataset(generated_val_dataset.head(50))
generated_train_dataset = to_dataset(generated_train_dataset.head(2500))
generated_val_dataset = to_dataset(generated_val_dataset.head(500))
generated_train_dataset

Dataset({
    features: ['tokens', 'text', 'original_index', '__index_level_0__'],
    num_rows: 232
})

## sft finetuning
necessary to have data be in distribution for ppo training according to their docs

In [12]:
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments


# Set up the training arguments
sft_arguments = SFTConfig(
     dataset_text_field="text",
    max_seq_length=512,
    output_dir="logs",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,  # 4
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    # fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="none",
    eval_strategy="epoch"
)

# Initialize the SFTTrainer with the SFTConfig
sft_trainer = SFTTrainer(
    model=model,
    train_dataset=generated_train_dataset,
    eval_dataset=generated_val_dataset,
    tokenizer=tokenizer,
    args=sft_arguments,
    packing=False,
)

# Note: If you need to set padding_side, you can do it in the config or directly in the tokenizer.
# For example:
tokenizer.padding_side = 'right'  # Ensure padding is set to 'right' if needed

Map: 100%|██████████| 232/232 [00:00<00:00, 3468.34 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 2969.24 examples/s]


In [25]:
sft_trainer.evaluate()

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'eval_loss': 1.4189746379852295,
 'eval_model_preparation_time': 0.0028,
 'eval_runtime': 6.531,
 'eval_samples_per_second': 8.881,
 'eval_steps_per_second': 1.225}

In [None]:
# sft_trainer.train()

In [14]:
# import shutil
# try:
#     shutil.rmtree("sft_trained")
# except:
#     print("sft_trained dir not present, will be created")
# model.save_pretrained("sft_trained", from_pt=True)

In [31]:
#_meval = AutoModelForCausalLM.from_pretrained("sft_trained")

Loading checkpoint shards: 100%|██████████| 6/6 [00:01<00:00,  5.69it/s]


In [15]:
_meval = AutoModelForCausalLM.from_pretrained("sft_trained", quantization_config=bnb_config)
#_meval = AutoModelForCausalLM.from_pretrained("sft_trained")
_meval = get_peft_model(_meval, lora_config)

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 6/6 [00:33<00:00,  5.56s/it]


In [32]:
splitstr = "[/INST]"
inference_str = generated_train_dataset[0]["text"].split(splitstr)[0] + splitstr
inference_str

"[INST] A circle with <sn>1<mn>4<en> walls forms the structure of the environment. There are <sn>1<mn>0<en> lights distributed evenly in the arena. Placed within a <sn>1.2<mn>1.01<en>-meter radius around the center are <sn>2<mn>15<en> robots. The robots' goal is to meet at the black circle. In the arena, you'll find two areas: a circle at [<sn>1.2<mn>0.14<en>, -<sn>1.2<mn>1.37<en>] with a radius of <sn>1.2<mn>0.38<en> meters and another circle at [-<sn>1.2<mn>0.14<en>, <sn>1.2<mn>1.43<en>] with a radius of <sn>1.2<mn>0.30<en> meters. \nGenerate the behavior tree that achieves the objective of this mission.[/INST]"

In [33]:

#inputs = tokenizer(formatted_prompt, return_tensors="pt", padding=True)
#inputs = tokenizer.encode(generated_val_dataset[0]["text"], return_tensors="pt", padding=True)

#attention_mask = inputs["attention_mask"]
inputs = tokenizer(
    inference_str,
    return_tensors="pt",  # Return PyTorch tensors
#    padding=True,          # Pad to the longest sequence
#    truncation=True,       # Truncate to the model's max length
    return_attention_mask=True  # Return the attention mask
)

# Access input_ids and attention_mask
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

text_tokens = _meval.generate(
    inputs['input_ids'].to(_meval.device), 
    attention_mask=attention_mask.to(_meval.device),
    min_length=1,  # Set to a positive value to ensure some output
    max_new_tokens=1500,  # Ensure this is directly passed
    do_sample=False,
    top_p=1.0,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id  # Use eos_token_id to stop generation
)

text = tokenizer.decode(text_tokens[0])
print(decode_number(text))

KeyboardInterrupt: 

In [30]:
text

"<s>[INST] A circle with <sn>1<mn>4<en> walls forms the structure of the environment. There are <sn>1<mn>0<en> lights distributed evenly in the arena. Placed within a <sn>1.2<mn>1.01<en>-meter radius around the center are <sn>2<mn>15<en> robots. The robots' goal is to meet at the black circle. In the arena, you'll find two areas: a circle at [<sn>1.2<mn>0.14<en>, -<sn>1.2<mn>1.37<en>] with a radius of <sn>1.2<mn>0.38<en> meters and another circle at [-<sn>1.2<mn>0.14<en>, <sn>1.2<mn>1.43<en>] with a radius of <sn>1.2<mn>0.30<en> meters. \nGenerate the behavior tree that achieves the objective of this mission.[/INST] Sure, here's a basic behavior tree that could be used to achieve the objective of this mission:\n\n1. **Start**: This is the initial state of the robot. It starts by checking if it is at the center of the arena.\n\n2. **Check if at center**: If the robot is at the center, it moves to the next state. If not, it navigates towards the center.\n\n3. **Navigate to center**: The 

In [18]:
text_tokens.shape

torch.Size([1, 1319])

In [19]:
tokenizer.encode("</s>")
tokenizer.decode(tokenizer.eos_token_id)

'</s>'

In [20]:
encoded_eos = tokenizer.encode("asdf  a </s> a")
print(encoded_eos)  # This should print the token IDs for </s>
print(tokenizer.decode(encoded_eos))  # This should print </s>

[1, 1061, 4821, 29473, 1032, 29473, 2, 1032]
<s>asdf  a </s> a


In [21]:
tokenizer.encode(tokenizer.decode(tokenizer.eos_token_id)), tokenizer.eos_token_id

([1, 2], 2)

In [22]:
# import torch
# import gc

# # Delete the model
# del model

# # Free the GPU memory
# torch.cuda.empty_cache()

# # Perform garbage collection
# gc.collect()

In [23]:
# import torch
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# from torch.utils.data import Dataset, DataLoader
# import random

# # Define a custom dataset class
# class TokenDataset(Dataset):
#     def __init__(self, tokenized_data, max_length):
#         self.tokenized_data = tokenized_data
#         self.max_length = max_length

#     def __getitem__(self, idx):
#         input_ids = self.tokenized_data[idx]
#         # Pad the input IDs to the maximum length
#         input_ids = input_ids + [0] * (self.max_length - len(input_ids))
#         return {'input_ids': torch.tensor(input_ids)}

#     def __len__(self):
#         return len(self.tokenized_data)

# # Create a sample dataset
# # Assuming generated_val_dataset is a list of dictionaries with a 'tokens' key
# max_length = max(len(row["tokens"]) for row in generated_val_dataset)

# # Pad each sequence with 0 and append EOS token
# padded_tokens = []
# for row in generated_val_dataset:
#     padded_token = row["tokens"] + [0] * (max_length - len(row["tokens"])) + [tokenizer.eos_token_id]
#     padded_tokens.append(padded_token)
# tokenized_data = padded_tokens

# # Create a dataset instance
# dataset = TokenDataset(tokenized_data, max_length=10)

# # Create a data loader
# data_loader = DataLoader(dataset, batch_size=2, shuffle=True)

# # Set the device (GPU or CPU)
# #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# #model.to(device)

# # Define the training loop
# def train(model, device, data_loader, epochs):
#     optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
#     for epoch in range(epochs):
#         model.train()
#         total_loss = 0
#         for batch in data_loader:
#             input_ids = batch['input_ids'].to(device)
#             # Create a random mask for the input IDs
#             mask = torch.zeros_like(input_ids)
#             for i in range(input_ids.shape[0]):
#                 mask[i, random.randint(0, input_ids.shape[1] - 1)] = 1
#             # Zero the gradients
#             optimizer.zero_grad()
#             # Forward pass
#             outputs = model(input_ids=input_ids, labels=input_ids, attention_mask=mask)
#             loss = outputs.loss
#             # Backward pass
#             loss.backward()
#             # Update the model parameters
#             optimizer.step()
#             # Accumulate the loss
#             total_loss += loss.item()
#         print(f'Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}')

# # Train the model
# train(_meval, _meval.device, data_loader, epochs=5)