In [1]:
import os, torch, logging
from datasets import load_dataset, load_metric
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import transformers
import pandas as pd

In [4]:
data = pd.read_csv('final_data1.tsv', sep="\t")

data.head()

Unnamed: 0,Question,Answer
0,What is the key challenge with full fine-tunin...,"Full fine-tuning of large models like GPT-3, w..."
1,What is Low-Rank Adaptation (LoRA)?,LoRA is a method that freezes the pre-trained ...
2,How does LoRA compare to full fine-tuning in t...,LoRA can reduce the number of trainable parame...
3,What is the impact of LoRA on inference latency?,LoRA introduces no additional inference latenc...
4,Can LoRA be combined with other adaptation met...,"Yes, LoRA is orthogonal to many prior methods ..."


In [5]:
instruction = "<s>[INST] Answer the following question: "
data["text"] = (
    instruction + data["Question"] + "[/INST] " + data["Answer"] + " </s>"
)

# Drop other columns so that only the 'text' column remains
data = data[["text"]]

In [6]:
import pyarrow as pa
from datasets import Dataset, DatasetDict

training_data = Dataset(pa.Table.from_pandas(data.reset_index(drop=True)))

In [None]:
# Model and tokenizer names
base_model_name = "likenneth/honest_llama2_chat_7B"
refined_model = "honest-llama-neuralearn-qlora-ft"

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False
)

quant_8bits = BitsAndBytesConfig(
    load_in_8bit=True,
)

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map="auto"
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1