<a href="https://colab.research.google.com/github/stewart-lab/kmGPT/blob/fine-tuning/Unsloth_Lora_Fine_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install accelerate
!pip install peft
!pip install wandb
!pip install trl
!pip install bitsandbytes
!pip install scikit-learn
!pip install "unsloth[cu118-torch230] @ git+https://github.com/unslothai/unsloth.git"
!pip install "unsloth[cu121-torch230] @ git+https://github.com/unslothai/unsloth.git"
!pip install "unsloth[cu118-ampere-torch230] @ git+https://github.com/unslothai/unsloth.git"
!pip install "unsloth[cu121-ampere-torch230] @ git+https://github.com/unslothai/unsloth.git"
!pip install xformers

Collecting unsloth[cu118-torch230]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-5iuhp51_/unsloth_029671f8e8254e86b3a3194f8905d879
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-5iuhp51_/unsloth_029671f8e8254e86b3a3194f8905d879
  Resolved https://github.com/unslothai/unsloth.git to commit 2f2b478868f63b66aaaa93db66ab3d811cddc95e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting xformers@ https://download.pytorch.org/whl/cu118/xformers-0.0.26.post1%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl (from unsloth[cu118-torch230]@ git+https://github.com/unslothai/unsloth.git)
  Using cached https://download.pytorch.org/whl/cu118/xformers-0.0.26.post1%2Bcu118-cp310-cp310-manyli

In [2]:
!pip install flash-attn



In [None]:
from accelerate import Accelerator
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig
import wandb
import transformers
import torch
import glob
import pandas as pd
from tqdm import tqdm
import numpy as np
import os
import random
from unsloth import FastLanguageModel
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from torch import nn
import sys
import gc
from transformers import AdamW
from accelerate import notebook_launcher
from sklearn.model_selection import train_test_split
from accelerate import DistributedDataParallelKwargs
import time
import re
from transformers import get_cosine_schedule_with_warmup
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import TrainingArguments, BitsAndBytesConfig
import accelerate
import json
from peft import IA3Config, IA3Model, LoraConfig
import jinja2
import math
import bitsandbytes as bnb
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import math
from trl import setup_chat_format
from peft import prepare_model_for_kbit_training

# From this Gist: https://gist.github.com/ihoromi4/b681a9088f348942b01711f251e5f964
def seed_everything(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [None]:
!huggingface-cli login --token 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

In [None]:
!wandb login 4a376fd0ab1c0901b9d9886d0734a88b4794a7fd

In [None]:
class config:
    # General Configuration
    device_type = "gpus"
    model = "unsloth/Phi-3-mini-4k-instruct"

    # Training Configuration
    max_seq_length = 2048
    trust = True

    # Prompt Parameters
    ab_hypothesis = "There exists an interaction between the disease {a_term} and the gene {b_term}."
    bc_hypothesis = "There exists an interaction between the drug {c_term} and the gene {b_term}."
    ac_hypothesis = "The drug {c_term} has an interaction with the disease {a_term}."

    instr = "Classify this abstract as either 0 (Not Relevant) or 1 (Relevant) for evaluating the provided hypothesis."

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = config.model,
    max_seq_length = config.max_seq_length,
    load_in_4bit = True,
    trust_remote_code = config.trust,
    attn_implementation = 'flash_attention_2',
    device_map = "auto",
)

model = FastLanguageModel.get_peft_model(
    model,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    r = 64,
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_rslora = True,
    loftq_config = None
)

In [None]:
def train_ans_prompt(hyp, abstract, instr, label, cot) -> str:
	return f"Abstract: {abstract}\nHypothesis: {hyp}\nInstructions: {instr}\nScore: {label}\nExplanation: {cot}"

def test_ans_prompt(hyp, abstract, instr, label) -> str:
	return f"Abstract: {abstract}\nHypothesis: {hyp}\nInstructions: {instr}\nScore: {label}"

def eval_ans_prompt(hyp, abstract, instr) -> str:
	return f"Abstract: {abstract}\nHypothesis: {hyp}\nInstructions: {instr}\nScore: "

In [None]:
train = pd.read_csv("filtered_synthetic_train.tsv", sep="\t")
test = pd.read_csv("test.tsv", sep="\t")

In [None]:
def processRowTrainText(row, prompt_fn):
    if pd.isnull(row["a_term"]):
        hypothesis = config.bc_hypothesis.format(c_term=row["c_term"], b_term=row["b_term"])
    elif pd.isnull(row["b_term"]):
        hypothesis = config.ac_hypothesis.format(c_term=row["c_term"], a_term=row["a_term"])
    elif pd.isnull(row["c_term"]):
        hypothesis = config.ab_hypothesis.format(a_term=row["a_term"], b_term=row["b_term"])
    return prompt_fn(hypothesis, row["abstract"], config.instr, int(row["label"]), row["cot"])

In [None]:
def processRowTestText(row, prompt_fn):
    if pd.isnull(row["a_term"]):
        hypothesis = config.bc_hypothesis.format(c_term=row["c_term"], b_term=row["b_term"])
    elif pd.isnull(row["b_term"]):
        hypothesis = config.ac_hypothesis.format(c_term=row["c_term"], a_term=row["a_term"])
    elif pd.isnull(row["c_term"]):
        hypothesis = config.ab_hypothesis.format(a_term=row["a_term"], b_term=row["b_term"])
    return prompt_fn(hypothesis, row["abstract"], config.instr, int(row["label"]))

In [None]:
def processRowPrompt(row, prompt_fn):
    if pd.isnull(row["a_term"]):
        hypothesis = config.bc_hypothesis.format(c_term=row["c_term"], b_term=row["b_term"])
    elif pd.isnull(row["b_term"]):
        hypothesis = config.ac_hypothesis.format(c_term=row["c_term"], a_term=row["a_term"])
    elif pd.isnull(row["c_term"]):
        hypothesis = config.ab_hypothesis.format(a_term=row["a_term"], b_term=row["b_term"])
    return prompt_fn(hypothesis, row["abstract"], config.instr)

In [None]:
train["text"] = train.apply(lambda row: processRowTrainText(row, train_ans_prompt), axis=1)
train["prompt"] = train.apply(lambda row: processRowPrompt(row, eval_ans_prompt), axis=1)
train = Dataset.from_pandas(train)

In [None]:
test["text"] = test.apply(lambda row: processRowTestText(row, test_ans_prompt), axis=1)
test["prompt"] = test.apply(lambda row: processRowPrompt(row, eval_ans_prompt), axis=1)
test = Dataset.from_pandas(test)

In [None]:
print(train["text"][69])

In [None]:
print(train["prompt"][69])

In [None]:
print(test["text"][69])

In [None]:
print(test["prompt"][69])

# Training

In [None]:
wandb.init(project="kmGPT", entity = "morgridge", group = "Fine Tuning", name = "Unslothed RSLora 32 & Neftune & (Filtered Labels + CoT) & Phi-3", reinit=True)

In [None]:
from transformers.integrations import WandbCallback
class LLMSampleCB(WandbCallback):
    def __init__(self, trainer, test_dataset):
        super().__init__()
        self.test = test_dataset
        self.y = torch.tensor(self.test["label"])
        self.model, self.tokenizer = trainer.model, trainer.tokenizer

    def get_metrics(self):
        FastLanguageModel.for_inference(trainer.model)
        y_hat = []
        for i in tqdm(range(len(test["prompt"]))):
            prompt = test["prompt"][i]
            prompt_ids = tokenizer(prompt, return_tensors="pt")["input_ids"]
            out = trainer.model.generate(prompt_ids.cuda(), max_new_tokens = 1)[-1]
            response = tokenizer.decode(out)
            try:
                score = int(response[-1])
            except:
                score = 1 - self.y[i]
            y_hat.append(score)

        y_hat = torch.tensor(y_hat)

        acc = accuracy_score(self.y, y_hat)
        prec = precision_score(self.y, y_hat, average='weighted')
        recall = recall_score(self.y, y_hat, average='weighted')
        f1 = f1_score(self.y, y_hat, average='weighted')

        return acc, prec, recall, f1

    def on_evaluate(self, args, state, control,  **kwargs):
        super().on_evaluate(args, state, control, **kwargs)
        acc, prec, recall, f1 = self.get_metrics()
        self._wandb.log({"Running Validation Accuracy": acc})
        self._wandb.log({"Running Validation Precision": prec})
        self._wandb.log({"Running Validation Recall": recall})
        self._wandb.log({"Running Validation F1": f1})
        epoch = math.ceil(trainer.state.epoch)

        print(f"Epoch {epoch}:\n\tAccuracy: {acc:.3f}\n\tPrecision: {prec:.3f}\n\tRecall: {recall:.3f}\n\tF-1 Score: {f1:.3f}")


In [None]:
training_args = TrainingArguments(
    output_dir = "checkpoints",
    report_to = "wandb",
    learning_rate = 2e-4,
    warmup_ratio = 0.03,
    lr_scheduler_type = "cosine",
    num_train_epochs = 5,
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 8,
    bf16 = True,
    optim = "paged_adamw_32bit",
    evaluation_strategy="epoch",
    save_strategy = "epoch",
    logging_steps = 1,
    do_eval=True,
    load_best_model_at_end = True,
    save_total_limit = 2,
    neftune_noise_alpha = 5,
    weight_decay = 0.01,
)

In [None]:
trainer = SFTTrainer(
    args = training_args,
    model=model,
    # peft_config=peft_config,
    # data_collator=DataCollatorForCompletionOnlyLM(response_template_ids, tokenizer = tokenizer),
    packing = True,
    train_dataset=train,
    eval_dataset=test,
    dataset_text_field="text",
    tokenizer=tokenizer,
)

In [None]:
wandb_callback = LLMSampleCB(trainer, test)
trainer.add_callback(wandb_callback)

In [None]:
trainer.train()

In [None]:
FastLanguageModel.for_inference(model)
prompt = test["prompt"][0]
prompt_ids = tokenizer(prompt, return_tensors="pt")["input_ids"]
out = model.generate(prompt_ids.cuda(), max_new_tokens = 100)
response = tokenizer.decode(out[0])

In [None]:
print(response)

In [None]:
with torch.inference_mode():
    with torch.cuda.amp.autocast():
        y_hat = []
        for i in tqdm(range(len(test["prompt"]))):
            prompt = test["prompt"][i]
            prompt_ids = tokenizer(prompt, return_tensors="pt")["input_ids"]
            out = trainer.model.generate(prompt_ids.cuda(), max_new_tokens = 1)[-1]
            response = tokenizer.decode(out)
            score = int(response[-1])
            y_hat.append(score)
            # print(score)

In [None]:
y = torch.tensor(test["label"])
y_hat = torch.tensor(y_hat)

In [None]:
wandb.log({"Validation Accuracy": accuracy_score(y, y_hat)})
wandb.log({"Validation Precision": precision_score(y, y_hat, average='weighted')})
wandb.log({"Validation Recall": recall_score(y, y_hat, average='weighted')})
wandb.log({"Validation F1-Score": f1_score(y, y_hat, average='weighted')})

In [None]:
accuracy_score(y, y_hat)

In [None]:
precision_score(y_hat, y)

In [None]:
recall_score(y_hat, y)

In [None]:
f1_score(y_hat, y)

In [None]:
wandb.log({f"Confusion Matrix": wandb.plot.confusion_matrix(y_true=y.tolist(), preds=y_hat.tolist(), class_names=["Irrelevant", "Relevant"], title = "Relevance Confusion Matrix")})

In [None]:
wandb.finish()