<a href="https://colab.research.google.com/github/prane-eth/AI_projects/blob/main/projects/LLM_fine-tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Fine-tuning a language model

In [48]:
%%capture
%pip install pandas groq python-dotenv datasets
%pip install 'unsloth @ git+https://github.com/unslothai/unsloth.git'
%pip install --no-deps 'xformers<0.0.26' trl peft accelerate bitsandbytes

### Generate synthetic data for fine-tuning
**Data generation using an LLM**: Uses a Large model like Llama-3 (70B) to generate data to use for fine-tuning a small model like Phi 3 (3.8B).

In [49]:
import os
import re
import pandas as pd
from transformers import TextStreamer
from groq import Groq

folder = 'datasets'
if not os.path.exists(folder):
	os.makedirs(folder)

data_filename = os.path.join(folder, 'customer_support_bot_finetune_data.csv')

# if file exists, read it
if os.path.exists(data_filename):
	with open(data_filename, 'r') as file:
		csv_text = file.read()
else:
	client = Groq(
		api_key=os.getenv('GROQ_API_KEY'),
	)

	lines = 100
	prompt = 'Generate high-quality data for fine-tuning in csv for customer support chatbot' \
			f' for an ecommerce platform in {lines} lines of data. fields: instruction, output.' \
			'Include the csv file text in triple quotes ```. ' \
			'response should include no other text.'
	chat_completion = client.chat.completions.create(
		messages=[{ 'role': 'user', 'content': prompt }],
		model='llama3-70b-8192',
	)

	response = chat_completion.choices[0].message.content
	if not response:
		raise SystemExit('No response from the API.')

	# if response doesnt end with ``` then add it
	if not response.endswith('```'):
		response += '```'

	# get the data from the response - json object between triple quotes ``` ```
	match = re.search(r'```(.*?)```', response, re.DOTALL)
	if match:
		csv_text = match.group(1)
		csv_text = csv_text.strip()
		# write to json file
		with open(data_filename, 'w') as file:
			file.write(csv_text)
	else:
		print(response)
		raise SystemExit('No data found in the response.')


training_data = pd.read_csv(data_filename)
print(f'Data size: {len(training_data)}')

training_data.head()

Data size: 56


Unnamed: 0,instruction,output
0,What is the status of my order?,Your order is currently being processed. Pleas...
1,I want to return my item,Please contact our customer service team to in...
2,I forgot my password,No worries! Click on the 'Forgot Password' lin...
3,I want to cancel my order,We're sorry to hear that. Please contact our c...
4,Where is my order?,Tracking information will be sent to you via e...


### Prepare the model for fine-tuning

In [50]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = 'unsloth/Phi-3-mini-4k-instruct',
	max_seq_length = max_seq_length,
	dtype = None,  # None for auto-detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
	load_in_4bit = True, # 4-bit quantization to reduce memory usage
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

==((====))==  Unsloth: Fast Mistral patching release 2024.5
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.25.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### Prepare the dataset for fine-tuning

In [51]:
prompt = '''Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
{}'''

def format_prompts():
    instructions = training_data['instruction']
    outputs = training_data['output']
    texts = []
    for instruction, output in zip(instructions, outputs):
        # without EOS_TOKEN, generation will go on forever
        text = prompt.format(instruction, output) + tokenizer.eos_token
        texts.append(text)
    return { 'text': texts }

dataset = format_prompts()

from datasets import Dataset
if not isinstance(dataset, Dataset):
    dataset = Dataset.from_dict(dataset)

dataset[0]['text']

'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat is the status of my order?\n\n### Response:\nYour order is currently being processed. Please allow 3-5 business days for shipping.<|endoftext|>'

### Train the model

In [52]:
from trl import SFTTrainer
from transformers import TrainingArguments

from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer_stats = trainer.train()

# # Save the model and tokenizer
# model.save_pretrained('customer_support_model')
# tokenizer.save_pretrained('customer_support_model')

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/56 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss
1,2.2262
2,2.2781
3,2.0239
4,1.6303
5,1.4704
6,1.1129
7,0.9839
8,0.8402
9,0.7082
10,0.7607


### Test the model

In [62]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
def ask_query(query):
    inputs = tokenizer(
    [
        # query
        prompt.format(
            query,
            '', # output - leave this blank for generation!
        )
    ], return_tensors = 'pt').to('cuda')

    # # Streaming outputs
    # text_streamer = TextStreamer(tokenizer)
    # _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

    outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
    output = ''.join(tokenizer.batch_decode(outputs))

    # find 'Response: ' and get text after that
    if 'Response:' in output:
        output = output[output.find('Response:') + len('Response:') + 1:]  # also remove extra space or \n

    # remove '<|endoftext|>' from end
    if output.endswith('<|endoftext|>'):
        output = output[:-len('<|endoftext|>')]

    return output.strip()

ask_query('What are the payment options?')

'We accept all major credit cards and PayPal.'

In [63]:
ask_query('May I know the return policy?')

'We accept returns within 30 days of delivery. Please see our full return policy for details.'