In [None]:
#Reference -- https://www.mlexpert.io/machine-learning/tutorials/alpaca-fine-tuning
#Reference -- https://github.com/tloen/alpaca-lora/blob/main/finetune.py

### Note -- Recommended to run this on GPU than CPU

In [None]:
#Change the current path of the execution
import sys
import os
cwd = f'{os.getcwd()}/../'
sys.path.append(cwd)
os.chdir(cwd)

In [None]:
%pip install -U pip
%pip install accelerate==0.18.0
%pip install appdirs==1.4.4
%pip install bitsandbytes==0.37.2 ##
%pip install datasets==2.10.1
%pip install fire==0.5.0
%pip install git+https://github.com/huggingface/peft.git
%pip install git+https://github.com/huggingface/transformers.git
%pip install torch==2.0.1
%pip install sentencepiece==0.1.97
%pip install tensorboardX==2.6
%pip install gradio==3.23.0
%pip install gdown

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import transformers
import textwrap
from transformers import LlamaTokenizer, LlamaForCausalLM
import os
import sys
from typing import List
 
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    #prepare_model_for_int8_training, --> DEPCRECATED
    prepare_model_for_kbit_training
)
 
import fire
import torch
from datasets import load_dataset
import pandas as pd
 
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from pylab import rcParams
 
%matplotlib inline
sns.set(rc={'figure.figsize':(10, 7)})
sns.set(rc={'figure.dpi':100})
sns.set(style='white', palette='muted', font_scale=1.2)
 
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

In [None]:
!gdown 1xQ89cpZCnafsW5T3G3ZQWvR7q682t2BN -O datasets/

In [None]:
df = pd.read_csv("datasets/bitcoin-sentiment-tweets.csv")
df.head()

In [None]:
def sentiment_score_to_name(score: float):
    if score > 0:
        return "Positive"
    elif score < 0:
        return "Negative"
    return "Neutral"
 
dataset_data = [
    {
        "instruction": "Detect the sentiment of the tweet.",
        "input": row_dict["tweet"],
        "output": sentiment_score_to_name(row_dict["sentiment"])
    }
    for row_dict in df.to_dict(orient="records")
]

In [None]:
#Checking the first sample
print(dataset_data[0])

In [None]:
import json
with open("datasets/alpaca-bitcoin-sentiment-dataset.json", "w") as f:
    json.dump(dataset_data, f)

In [None]:
#BASE_MODEL = "decapoda-research/llama-7b-hf"
BASE_MODEL = "baffo32/decapoda-research-llama-7B-hf"

##The load_in_8bit=True parameter loads the model using 8-bit quantization to reduce memory usage and improve inference speed.
model = LlamaForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto" #"cuda:1" #"auto"
)

In [None]:
#Loads the tokenizer for the same Llama model using the LlamaTokenizer class, and sets some additional properties for padding tokens. Specifically, it sets the pad_token_id to 0 to represent unknown tokens, and sets the padding_side to "left" to pad sequences on the left side.
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
 
tokenizer.pad_token_id = (
    0  # unk. we want this to be different from the eos token
)
tokenizer.padding_side = "left"

In [None]:
#Now that we have loaded the model and tokenizer, we can proceed to load the JSON file we saved earlier using the load_dataset() function from the HuggingFace datasets library:

data = load_dataset("json", data_files="datasets/alpaca-bitcoin-sentiment-dataset.json")
data["train"]

In [None]:
#Next, we need to create prompts from the loaded dataset and tokenize them
def generate_prompt(data_point):
    return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.  # noqa: E501
### Instruction:
{data_point["instruction"]}
### Input:
{data_point["input"]}
### Response:
{data_point["output"]}"""
 
def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=256, #CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < 256 #CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)
 
    result["labels"] = result["input_ids"].copy()
 
    return result
 
def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt


#We want 200 examples for the validation set and apply shuffling to the data. The generate_and_tokenize_prompt() function is applied to every example in the train and validation set to generate the tokenized prompts.
train_val = data["train"].train_test_split(
    test_size=200, shuffle=True, seed=42
)
train_data = (
    train_val["train"].map(generate_and_tokenize_prompt)
)
val_data = (
    train_val["test"].map(generate_and_tokenize_prompt)
)

### Training

In [None]:
LORA_R = 8 #Lora attention dimension (rank)
LORA_ALPHA = 16 #Alpha parameter for lora scaling
LORA_DROPOUT= 0.05 #Dropout probability for lora layers
LORA_TARGET_MODULES = ["q_proj", "v_proj"] #Name of the modules to apply the adapter to
 
BATCH_SIZE = 128
MICRO_BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LEARNING_RATE = 3e-4
TRAIN_STEPS = 100 #300
OUTPUT_DIR = "experiments"

#model = prepare_model_for_int8_training(model)
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
#model.print_trainable_parameters()

#Below we create a TrainingArguments object which specifies various settings and hyperparameters for training the model
#https://discuss.huggingface.co/t/training-llama-with-lora-on-multiple-gpus-may-exist-bug/47005/4
training_arguments = transformers.TrainingArguments(
    per_device_train_batch_size=MICRO_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    warmup_steps=50, #100
    max_steps=TRAIN_STEPS,
    learning_rate=LEARNING_RATE,
    fp16=True,
    logging_steps=10,
    optim="adamw_torch",
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=50,
    save_steps=50,
    output_dir=OUTPUT_DIR,
    save_total_limit=3,
    load_best_model_at_end=True,
    report_to="tensorboard",
    ddp_find_unused_parameters=False, #--> This is important, or else training won't work
)

#DataCollatorForSeq2Seq is a class from the Transformers library that creates batches of input/output sequences for sequence-to-sequence (seq2seq) models. 
#Here, a DataCollatorForSeq2Seq object is instantiated with the following parameters:
#+ pad_to_multiple_of: An integer representing the maximum sequence length, rounded up to the nearest multiple of this value.
#+ padding: A boolean indicating whether to pad the sequences to the specified maximum length.
data_collator = transformers.DataCollatorForSeq2Seq(
    tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
)

In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=training_arguments,
    data_collator=data_collator
)

#After instantiating the Trainer, we set use_cache to False in the model's config, and creates a state_dict for the model using the get_peft_model_state_dict() function, which prepares the model for training using low-precision arithmetic.
model.config.use_cache = False
old_state_dict = model.state_dict
model.state_dict = (
    lambda self, *_, **__: get_peft_model_state_dict(
        self, old_state_dict()
    )
).__get__(model, type(model))
 
#Then, the torch.compile() function is called on the model, which compiles the model's computation graph and prepares it for training using PyTorch 2.
#model = torch.compile(model) #--> SafetensorError: Error while deserializing header: InvalidHeaderDeserialization
#https://github.com/huggingface/transformers/issues/27397
 
trainer.train()
model.save_pretrained(OUTPUT_DIR)

In [None]:
# Try predictions on validation set for confirmation
predictions = trainer.predict(val_data)

### Predictions

In [None]:
from transformers import LlamaTokenizer, LlamaForCausalLM
trained_model = LlamaForCausalLM.from_pretrained(OUTPUT_DIR)

In [None]:
# Try predictions on validation set for confirmation
predictions = trained_model.predict(val_data)