# Exp018: Conditional instruction fine-tuning
This experiment aims at instruction fine-tuning from existing skills in the dataset to train the model on single constraints.

In [1]:
from datasets import load_dataset
from dotenv import load_dotenv
load_dotenv()
import os

from tqdm.notebook import tqdm
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments


import pickle
from torch.utils.data import RandomSampler
import numpy as np
import json
from peft import LoraConfig, PeftModel
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
import random
import sys
sys.path.append(f'../source')
import helpers
import models
import importlib
#importlib.reload(models)



<module 'models' from '/cluster/home/dglandorf/grammarctg/experiments/../source/models.py'>

In [2]:
# params
out_file = '../data/corpus_classification.pkl'
preprossed_dataset_file = '../data/SFT_data.jsonl'
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
nrs = [616] #[58, 616]#
classifier = models.load_classifier(616, "corpus_training")

In [3]:
egp = helpers.get_egp()

## Prepare dataset

In [None]:
with open(out_file, 'rb') as f:
    all_hit_indices = pickle.load(f)
    all_hit_sentences = pickle.load(f)
    extracts = pickle.load(f)

data = [{"context": extracts[idx][0], "response": extracts[idx][1], "nr": nr} for nr in nrs for idx in all_hit_indices[nr]]

In [None]:
def formatting_func(example):
    rules = egp[egp['#'].isin(example['nr'] if type(example['nr']) == list else [example['nr']])]
    constraints = os.linesep.join("- " + rules['SubCategory'] + " - " + rules['guideword'] + ": " + rules['Can-do statement'])
    context = os.linesep.join([("A" if (i%2==0) else "B") + ": " + utt for i, utt in enumerate(example["context"])])

    return f"""[INST] Write an answer of A that includes the affirmative form of "would like".

Dialog:
{context} [/INST] 
A: {example['response']}</s>"""
    
    return f"""[INST] Continue the dialog with one turn and show all of these grammar skills in your response.
Grammar skills:
{constraints}
Dialog:
{context} [/INST] 
A: {example['response']}</s>"""

with open(preprossed_dataset_file, 'w') as f:
    for item in tqdm(data[:500]):
        # line['prompt'], line['completion'] = formatting_func(item) # for completion chat format
        item['text']  = formatting_func(item)
        f.write(json.dumps(item) + '\n')

### Load dataset

In [4]:
dataset = load_dataset('json', data_files=preprossed_dataset_file, split='train', cache_dir=os.getenv('CACHE_DIR'))
train_test_split = dataset.train_test_split(test_size=0.2)
train_dataset, test_dataset = train_test_split['train'], train_test_split['test']

## Load and prepare base model

In [5]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, cache_dir=os.getenv('CACHE_DIR'), device_map="auto")
model.config.use_cache = False
model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir=os.getenv('CACHE_DIR'))
tokenizer.padding_side = "right"

tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = '[PAD]'
model.resize_token_embeddings(len(tokenizer))

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Embedding(32001, 4096)

### Inference with base model

In [22]:
example = random.choice(test_dataset)
#example = train_dataset[10]
#example['nr'] = [58, 616]
#example['text'] = formatting_func(example)
#print(example['text'])

#converted_sample = [
#    {"role": "user", "content": example["prompt"]},
#    #{"role": "assistant", "content": example["completion"]},
#]
#model_input = tokenizer.apply_chat_template(converted_sample, return_tensors="pt").to(device)

EOP = "[/INST]"
eval_prompt = example['text'][:example['text'].index(EOP)+len(EOP)+4]
model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
model.eval()
with torch.no_grad():
    token_ids = model.generate(**model_input, max_new_tokens=1024, pad_token_id=2)[0]
    output_text = tokenizer.decode(token_ids, skip_special_tokens=True)
print(output_text)

[INST] Write an answer of A that includes the affirmative form of "would like".

Dialog:
A: Good morning, sir. May I come in?
B: Good morning. Yes, please. Take a seat. I guess you want to open an account, right?
A: Yes.
B: Great. What account do you want to open? A checking account or a saving account? [/INST] 
A: I'd like to open a saving account.


## Fine-tuning

In [7]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    task_type="CAUSAL_LM",
)

In [8]:
training_arguments = TrainingArguments(
    output_dir="../models/mistral_FT",
    num_train_epochs=2,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    #save_steps=25,
    logging_steps=20,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=250,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="wandb",
    run_name="gctg",
    #load_best_model_at_end=True,
    evaluation_strategy="steps",
    eval_steps=20,
    per_device_eval_batch_size=20,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
)

In [9]:
response_template = "[/INST]"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

In [10]:
def preprocess_logits_for_metrics(logits, labels):
    if isinstance(logits, tuple):
        logits = logits[0]
    return logits.argmax(dim=-1)

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True, device="cpu")
    print(decoded_preds)
    #decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True, device="cpu")
    #rouge_output = rouge.compute(predictions=decoded_preds, references=decoded_labels)


    
def compute_metrics(eval_preds):
    #preds, labels = eval_preds
    #print(preds, labels)
    print("EPOCH", "___" * 20)
    for ds in [train_dataset, test_dataset]:
        random_sampler = RandomSampler(ds, num_samples=3)
        for idx in random_sampler:
            text = ds[idx]['text']
            #print(text)
            EOP = "[/INST]"
            response_idx = text.index(EOP)+len(EOP)+4
            eval_prompt = text[:response_idx]
            model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
            model.eval()
            with torch.no_grad():
                token_ids = model.generate(**model_input, max_new_tokens=1024, pad_token_id=2, repetition_penalty=1.5)[0]
                output_text = tokenizer.decode(token_ids, skip_special_tokens=True, device="cpu")
            print(f"Truth: {text[response_idx:]}")
            print(f"Generated: {output_text[response_idx:]}")
            print(f"Grammar score: {models.probe_model(classifier, [str(output_text[response_idx:])])[0].item()}")
    return {"metric": 1.0}

In [11]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    data_collator=collator,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics,
    compute_metrics=compute_metrics
    #neftune_noise_alpha=5,
)

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [12]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdo-gl[0m ([33mdomgla[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Metric
20,2.1938,1.811004,1.0
40,1.6739,1.791012,1.0
60,1.3498,1.870736,1.0
80,1.0359,1.907026,1.0
100,0.8253,1.986304,1.0
120,0.3827,2.161965,1.0
140,0.3392,2.326213,1.0
160,0.2072,2.784266,1.0
180,0.1243,2.737197,1.0
200,0.1489,2.770288,1.0


EPOCH ____________________________________________________________
Truth:  Ugh, I don't think I would like that very much! I'm sure there are more interesting ways, maybe different types of steaks?</s>
Generated:  That reminds me...I would love for someone else in our family to make dinner tonight so we could have something different than what mom makes all the time
Grammar score: 0.014691396616399288
Truth:  Yeah I don't think I would like it but I bet the Canadian bands sure do. </s>
Generated:  Yeah but you would still have a lot more exposure if your songs were played in Canada than not being heard there
Grammar score: 0.004068496637046337
Truth:  Sure. We also would like to use the chairs from the lunchroom. Would that be OK?</s>
Generated:  Yes we would prefer if someone was there when we drop off the car as well
Grammar score: 0.1942264884710312
Truth:  I would like to watch it again</s>
Generated:  Yeah, definitely!
Grammar score: 0.0016454245196655393
Truth:  Well i have the b

TrainOutput(global_step=250, training_loss=0.679188768863678, metrics={'train_runtime': 361.8651, 'train_samples_per_second': 5.527, 'train_steps_per_second': 0.691, 'total_flos': 1.475172446611968e+16, 'train_loss': 0.679188768863678, 'epoch': 5.0})

In [13]:
trainer.save_model()

