# Exp018: Conditional instruction fine-tuning
This experiment aims at instruction fine-tuning from existing skills in the dataset to train the model on single constraints.

In [1]:
from datasets import load_dataset
from dotenv import load_dotenv
load_dotenv()
import os

from tqdm.notebook import tqdm
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments

import pickle
import numpy as np
import json
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import random
import sys
sys.path.append(f'../source')
import helpers



In [2]:
# params
out_file = '../data/corpus_classification.pkl'
preprossed_dataset_file = '../data/SFT_data.jsonl'
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
nrs = [616] #[58, 616]

In [3]:
egp = helpers.get_egp()

In [4]:
with open(out_file, 'rb') as f:
    all_hit_indices = pickle.load(f)
    all_hit_sentences = pickle.load(f)
    extracts = pickle.load(f)

data = [{"context": extracts[idx][0], "response": extracts[idx][1], "nr": nr} for nr in nrs for idx in all_hit_indices[nr]]

In [5]:
def formatting_func(example):
    rules = egp[egp['#'].isin(example['nr'] if type(example['nr']) == list else [example['nr']])]
    constraints = os.linesep.join("- " + rules['SubCategory'] + " - " + rules['guideword'] + ": " + rules['Can-do statement'])
    context = os.linesep.join([("A" if (i%2==0) else "B") + ": " + utt for i, utt in enumerate(example["context"])])

    return f"""[INST] Continue the dialog with one turn and show all of these grammar skills in your response.
Grammar skills:
{constraints}
Dialog:
{context} [/INST] 
A: {example['response']}</s>"""
    
with open(preprossed_dataset_file, 'w') as f:
    for item in tqdm(data):
        # line['prompt'], line['completion'] = formatting_func(item) # for completion chat format
        item['text']  = formatting_func(item)
        f.write(json.dumps(item) + '\n')

  0%|          | 0/741 [00:00<?, ?it/s]

In [6]:
dataset = load_dataset('json', data_files=preprossed_dataset_file, split='train', cache_dir=os.getenv('CACHE_DIR'))
train_test_split = dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

Generating train split: 0 examples [00:00, ? examples/s]

In [77]:
train_dataset[3]

{'context': ['What is the usual thing here?',
  'Forty to eighty RMB Yuan a plate.',
  "Let's say sixty Yuan then.",
  "OK. Is there anything special you'd like to have on the menu?"],
 'response': "We'd like to have typical Chinese food.",
 'nr': 616,
 'text': "[INST] Continue the dialog with one turn and show all of these grammar skills in your response.\nGrammar skills:\n- would - FORM: AFFIRMATIVE WITH 'LIKE': Can use the affirmative form with 'like'. \nDialog:\nA: What is the usual thing here?\nB: Forty to eighty RMB Yuan a plate.\nA: Let's say sixty Yuan then.\nB: OK. Is there anything special you'd like to have on the menu? [/INST] \nA: We'd like to have typical Chinese food.</s>"}

In [45]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4"
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, cache_dir=os.getenv('CACHE_DIR'), device_map="auto")
model.config.use_cache = False
model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir=os.getenv('CACHE_DIR'))
tokenizer.padding_side = "right"

tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = '[PAD]'
model.resize_token_embeddings(len(tokenizer))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Embedding(32001, 4096)

In [44]:
example = random.choice(train_dataset)
#example['nr'] = [58, 616]
#example['text'] = formatting_func(example)
print(example['response'])

#converted_sample = [
#    {"role": "user", "content": example["prompt"]},
#    #{"role": "assistant", "content": example["completion"]},
#]
#model_input = tokenizer.apply_chat_template(converted_sample, return_tensors="pt").to(device)

EOP = "[/INST]"
eval_prompt = example['text'][:example['text'].index(EOP)+len(EOP)]
model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
model.eval()
with torch.no_grad():
    generated_code = tokenizer.decode(model.generate(**model_input, max_new_tokens=1024, pad_token_id=2, repetition_penalty=1.5)[0], skip_special_tokens=True)
print(generated_code)

Dad, what do you want to have? I'd like a hamburger.
[INST] Continue the dialog with one turn and show all of these grammar skills in your response.
Grammar skills:
- would - FORM: AFFIRMATIVE WITH 'LIKE': Can use the affirmative form with 'like'. 
Dialog:
A: I'd like to go to McDonald's this time.
B: OK then. You'll drive, will you?
A: No, I'm a bit tired today. You do that, please.
B: OK, OK. I always do things like that ... Ah, here we are. [/INST] 
A: Do you want anything from there or not ?


In [46]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    task_type="CAUSAL_LM",
)

In [54]:
training_arguments = TrainingArguments(
    output_dir="../models/mistral_FT",
    num_train_epochs=2,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    #save_steps=25,
    logging_steps=10,
    learning_rate=1e-5,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=250,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="wandb",
    run_name="gctg",
    #load_best_model_at_end=True,
    #evaluation_strategy="steps",
    #eval_steps=25,
    #per_device_eval_batch_size=32,
    #metric_for_best_model="eval_loss",
    #greater_is_better=False,
)

In [55]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    #eval_dataset=test_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=256,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    #neftune_noise_alpha=5,
)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [None]:
trainer.train()

Step,Training Loss
10,3.203
20,2.985
30,2.5239
40,2.1482
50,2.1179
60,1.789
70,1.4681
