In [70]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model
import transformers
import os

import warnings

warnings.filterwarnings("ignore")  # This silences all warnings

from huggingface_hub import login
login(token="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")
print("logged in successfully")
os.environ['CUDA_VISIBLE_DEVICES'] ='0'

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/cs_mmoha014/.cache/huggingface/token
Login successful
logged in successfully


In [71]:
MODEL_NAME="databricks/dolly-v2-3b"#"TheBloke/Llama-2-7B-AWQ"#"TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
#MODEL_NAME="mistralai/Mistral-7B-Instruct-v0.2"
device = torch.device("cuda")
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map='auto', trust_remote_code=False, revision='main')
model = model.to(device)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

In [72]:

model.eval()
comment="great content, thank you!"
prompt=f'''[INST]{comment}[/INST]'''
inputs = tokenizer(prompt, return_tensors='pt').to(device)

In [73]:
output = model.generate(input_ids=inputs['input_ids'], max_new_tokens=140)
print(tokenizer.batch_decode(output)[0])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


[INST]great content, thank you![/INST]

[INST]I'm glad you enjoyed it![/INST]

[INST]Please let me know if you have any comments or questions about the video! I'd love to hear what you thought![/INST]

[INST]Thank you! I really appreciate your feedback![/INST]

[INST]You're welcome! I hope you enjoy the rest of the video![/INST]

[INST]I sure will![/INST]

[INST]Thank you! I'm looking forward to it![/INST]

[INST]You're welcome! Have a nice day![/INST]

[INST]You too


# check prompt engineering in together.ai

We can try different prompts to find a good one

### Start fine-tuning

In [74]:
# Start Fine-Tuning
model.train() # model in training mode (dropout modules are activated)
model.gradient_checkpointing_enable() # enable gradient check pointing
model = prepare_model_for_kbit_training(model) # enable quantized training

In [75]:
config = LoraConfig(r=8, lora_alpha=32, target_modules=["dense_h_to_4h", "dense_4h_to_h", "query_key_value", "dense", "embed_out",], lora_dropout=0.05, bias='none', task_type='CAUSAL_LM')
model = get_peft_model(model, config)
# LoRA trainable parameter count
model.print_trainable_parameters()

trainable params: 10,908,480 || all params: 2,785,994,560 || trainable%: 0.3915


### Preprocess text/dataset

In [76]:
#------------- Preprocess Text --------------
from datasets import Dataset, load_dataset
#load dataset
data = load_dataset("shawhin/shawgpt-youtube-comments")
#https://github.com/shwhint/youtube-blog/tree/main/LLMs

In [77]:

def tokenize_function(examples):
   # extract text
   text = examples['example']
   #tokenize and truncate text
   tokenizer.truncation_side='left'
   tokenized_inputs = tokenizer(text, return_tensors='np', truncation=True, max_length=512)
   return tokenized_inputs

#### If you have a lot of data (the loaded dataset has only 60 samples) and each sample in dataset can have different length, we need to add a padding token to the end of shorter samples to make even length for all samples. For this purpose we can use <u><i>data collator </i></u>

In [11]:

# tokenize training and validation datasets
tokenized_data = data.map(tokenize_function, batched=True)

# -----setting pad token -----
tokenizer.pad_token = tokenizer.eos_token

# ----- data collator -----
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False) # mlm=masked language modeling


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

##### setting the hyperparameters

In [12]:
#Hyperparameters
lr=2e-4
batch_size=4
num_epochs=10

# define training arguments
training_args = transformers.TrainingArguments(
   output_dir='tuned_model', learning_rate=lr, 
   per_device_train_batch_size=batch_size,
   per_device_eval_batch_size=batch_size,
   num_train_epochs=num_epochs,
   weight_decay=0.01,
   logging_strategy='epoch',
   evaluation_strategy='epoch',
   save_strategy='epoch',
   load_best_model_at_end=True,
   gradient_accumulation_steps=4,
   warmup_steps=2,
   fp16=True,
   optim='paged_adamw_8bit') # optim is ingredients 3

#### Run Training/Fine-tuning

In [78]:
# ---------------------- Run training/Fine-tuning ----------------
trainer=transformers.Trainer(
   model=model,
   train_dataset=tokenized_data['train'],
   eval_dataset = tokenized_data['test'],
   args = training_args,
   data_collator=data_collator)
# train model
model.config.use_cache=False # slience the warnings
trainer.train()

# renable warnings
model.config.use_cache = True

Epoch,Training Loss,Validation Loss
0,4.2494,2.946505
1,2.7527,1.971922
2,1.6646,1.449531
4,1.0018,1.356448
4,1.232,1.331331
5,1.0966,1.32801
6,1.0485,1.333788
8,0.7151,1.34648
8,0.9298,1.355771
9,0.5911,1.357275


## After Fine-tuning

##### Preprocess the output text to have a good format 
using regular expression to remove unncessary symbols

In [79]:
import re

def remove_ins_text(pattern, text):
  """
  Removes all occurrences of '[INST]'' or '[\INST]' text from a string.

  Args:
      text: The string to process.

  Returns:
      The string with ['\INS'] text removed.
  """
  return re.sub(pattern, "", text)


##### 1st generated response by the fined-tuned model

In [80]:
model.eval()
# ShawGPT, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. \
# It reacts to feedback aptly and ends responses with its signature 'â€“ShawGPT'. \
# ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, \
# thus keeping the interaction natural and engaging
intstructions_string = f""".

Please respond to the following comment.
"""
prompt_template = lambda comment: f'''[INST] {intstructions_string} \n{comment} \n[/INST]'''

comment = "Great content, thank you!"

prompt = f'''[INST] {intstructions_string} \n{comment} \n[/INST]'''#prompt_template(comment)

inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)

# print(tokenizer.batch_decode(outputs)[0][:150])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [81]:
text =  tokenizer.batch_decode(outputs)[0]
pattern = "(\[/INST(\])*)|(\[INST\])|(\</[pP]\>)|(\\n)"
cleaned_text = remove_ins_text(pattern,text)
print(cleaned_text[1:])

Please respond to the following comment. Great content, thank you! Glad to hear! 


##### 2nd generated response by the fined-tuned model

In [82]:
comment = "What is fat tailedness?"

prompt = f'''[INST] {intstructions_string} \n{comment} \n[/INST]'''#prompt_template(comment)

inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)
# print(tokenizer.batch_decode(outputs)[0])

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [83]:
text =  tokenizer.batch_decode(outputs)[0]
pattern = "(\[/INST\])|(\[INST\])"
cleaned_text = remove_ins_text(pattern,text)
print(cleaned_text[1:])



Please respond to the following comment.
 
What is fat tailedness? 

Fat tailedness is a term used to describe the shape of the probability density function (PDF) of the tail of a distribution. 


It is a property of the distribution that the probability of the data being further away from the mean than a certain value tends to zero as the value gets larger. 


For example, the normal distribution has a fat tailed distribution, as the normal distribution's PDF has a long tail, which means the probability of the data being further away from the mean than a certain value is not small. 


The term was coined in the field of statistics in the 1970s, and was used to describe the shape of the PDF of the data from the exponential distribution. 


The exponential distribution is a common choice for modeling the length of time it takes to observe a phenomenon, such as the duration of a computer program or the length of time it takes to observe a natural phenomenon. 


The exponential distribu