# Exp018: Supervised fine-tuning
This experiment aims at instruction fine-tuning from existing skills in the dataset to train the model on single constraints.

In [1]:
#from datasets import load_dataset
from dotenv import load_dotenv
load_dotenv()
import os

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from datasets import Dataset
import pickle
import numpy as np

import sys
sys.path.append(f'../source')
import helpers

In [2]:
out_file = '../data/corpus_classification.pkl'
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
egp = helpers.get_egp()

In [4]:
with open(out_file, 'rb') as f:
    all_hit_indices = pickle.load(f)
    all_hit_sentences = pickle.load(f)
    extracts = pickle.load(f)

In [5]:
data = [{"text": extracts[idx][0], "label": extracts[idx][1]} for idx in all_hit_indices[616]]
dataset = Dataset.from_list(data)

In [12]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

PackageNotFoundError: No package metadata was found for bitsandbytes

In [6]:
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, cache_dir=os.getenv('CACHE_DIR'), device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=os.getenv('CACHE_DIR'),
                                          padding_side="left", add_eos_token=True, add_bos_token=True)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [13]:
def formatting_func(example, nrs=[616]):
    rules = egp[egp['#'].isin(nrs)]
    constraints = os.linesep.join("- " + rules['SubCategory'] + " - " + rules['guideword'] + ": " + rules['Can-do statement'])
    context = os.linesep.join([("A" if (i%2==0) else "B") + ": " + utt for i, utt in enumerate(example["text"] + [])])

    return f"""[INST] Continue the dialog with one turn and show all of these grammar skills in your response.
Grammar skills:
{constraints}[/INST]
Dialog:
{context}"""

In [8]:
def generate_and_tokenize_prompt(prompt, max_length=512):
    result = tokenizer(
        formatting_func(prompt),
        truncation=True,
        max_length=max_length,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [9]:
train = dataset.map(generate_and_tokenize_prompt)

Map:   0%|          | 0/85 [00:00<?, ? examples/s]

In [10]:
eval_tokenizer = AutoTokenizer.from_pretrained(model_name, add_bos_token=True, cache_dir=os.getenv('CACHE_DIR'))

In [21]:
model_input = eval_tokenizer(formatting_func(data[5]), return_tensors="pt").to(device)
model.eval()
with torch.no_grad():
    print(eval_tokenizer.decode(model.generate(**model_input, max_new_tokens=32, repetition_penalty=1.15)[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[INST] Continue the dialog with one turn and show all of these grammar skills in your response.
Grammar skills:
- would - FORM: AFFIRMATIVE WITH 'LIKE': Can use the affirmative form with 'like'. [/INST]
Dialog:
A: I didn't know that. So they aren't like US soap operas?
B: They are and in Spain they are called culebrones
A: Neat! Are they just in Spanish speaking countries?
B: No they have them in Canada and there they are called téléroman or serials.
A: Wow, I had no idea. In the UK, we call them continuity dramas. Would you say that's
