# Install Dependencies

In [1]:
# !git clone https://github.com/tloen/alpaca-lora.git

In [1]:
%cd alpaca-lora/

/home/ec2-user/SageMaker/ISURU/llm-experiments/alpaca-lora


In [2]:
%ls

alpaca_data_cleaned.json  export_state_dict_checkpoint.py  pyproject.toml
alpaca_data.json          finetune.py                      README.md
DATA_LICENSE              generate.py                      requirements.txt
docker-compose.yml        lengths.ipynb                    [0m[01;34mtemplates[0m/
Dockerfile                LICENSE                          [01;34mutils[0m/
export_hf_checkpoint.py   [01;34mlora-alpaca[0m/


In [3]:
# !pip install -q datasets loralib sentencepiece
# !pip uninstall transformers
# !pip install -q git+https://github.com/zphang/transformers@c3dc391
# !pip install -q git+https://github.com/huggingface/peft.git
# !pip install bitsandbytes

# Load Alpaca Dataset

In [4]:
import torch, os
import pandas as pd
import transformers
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer

tokenizer = LLaMATokenizer.from_pretrained(
                                        "decapoda-research/llama-7b-hf", 
                                        add_eos_token=True
                                        )
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

data = load_dataset("json", data_files="alpaca_data.json", split='train')
data = data.select(range(1000))
data


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /home/ec2-user/anaconda3/envs/pytorch_p39/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.0
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so...


  warn(msg)
Found cached dataset json (/home/ec2-user/.cache/huggingface/datasets/json/default-18a0c6737b59d4e9/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)


Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 1000
})

In [5]:
instructions = data["instruction"]
inputs = data["input"]
outputs = data["output"]

alpaca_df = pd.DataFrame({"instruction": instructions, "input": inputs, "output": outputs})
alpaca_df = alpaca_df.sample(frac=1).reset_index(drop=True)
alpaca_df.head()

Unnamed: 0,instruction,input,output
0,Generate a list of present continuous verbs.,,"Running, eating, sleeping, calling, texting, w..."
1,"Based on the following passage, determine the ...",The focus on speed in production today is ofte...,The author holds a negative attitude towards t...
2,Rewrite the sentence using gender-neutral lang...,A salesman is giving a presentation.,A salesperson is giving a presentation.
3,"Create a grammar for the phrase ""a milkshake"".",,"The grammar for ""a milkshake"" is: \nArticle (a..."
4,Formulate a marketing plan for a new mobile app.,,The marketing plan for the new mobile app shou...


# FineTune-Alpaca-7B

### Here what we do is a thing called **low-rank adaptation (LoRA) finetuning**. Instead of Finetuning the whole transformer model, we only finetune certain parts of the model 

### Hparams

In [6]:
EPOCHS = 1
BATCH_SIZE = 8
CUTOFF_LEN = 256
LEARNING_RATE = 2e-5
MICRO_BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE

LORA_R = 4
LORA_ALPHA = 16
LORA_DROPOUT = 0.05

### Load Model

In [7]:
model = LLaMAForCausalLM.from_pretrained(
                                        "decapoda-research/llama-7b-hf",
                                        load_in_8bit=True, # load in 8-bit, so that we can quantize it and run in smaller GPUs 
                                        device_map="auto",
                                        )
tokenizer = LLaMATokenizer.from_pretrained(
                                        "decapoda-research/llama-7b-hf", add_eos_token=True
                                        )

model = prepare_model_for_int8_training(model)



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [9]:
config = LoraConfig(
                  r=LORA_R,
                  lora_alpha=LORA_ALPHA,
                  target_modules=["q_proj", "v_proj"],
                  lora_dropout=LORA_DROPOUT,
                  bias="none",
                  task_type="CAUSAL_LM",
              )
model = get_peft_model(model, config)
tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token
data = load_dataset("json", data_files="alpaca_data.json", split='train')
data = data.select(range(1000))
data

Found cached dataset json (/home/ec2-user/.cache/huggingface/datasets/json/default-18a0c6737b59d4e9/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)


Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 1000
})

In [10]:
def generate_prompt(data_point):
    # sorry about the formatting disaster gotta move fast
    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{data_point["instruction"]}
### Input:
{data_point["input"]}
### Response:
{data_point["output"]}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{data_point["instruction"]}
### Response:
{data_point["output"]}"""

# reprocess dataset using LAMBDA function
data = data.shuffle().map(
                      lambda data_point: tokenizer(
                                                generate_prompt(data_point),
                                                truncation=True,
                                                max_length=CUTOFF_LEN,
                                                padding="max_length",
                                                )
                        )

trainer = transformers.Trainer(
                            model=model,
                            train_dataset=data,
                            args=transformers.TrainingArguments(
                                                            per_device_train_batch_size=MICRO_BATCH_SIZE,
                                                            gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
                                                            warmup_steps=100,
                                                            num_train_epochs=EPOCHS,
                                                            learning_rate=LEARNING_RATE,
                                                            fp16=True,
                                                            logging_steps=1,
                                                            output_dir="lora-alpaca",
                                                            save_total_limit=3,
                            ),
                            data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
                            )
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)
model.save_pretrained("lora-alpaca-finetuned")

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]



Step,Training Loss
1,2.0844
2,2.2312
3,2.359
4,2.2245
5,2.211
6,2.1668
7,2.1397
8,2.2711
9,2.0308
10,2.0827


# Push to HuggingFace

In [13]:
from huggingface_hub import notebook_login
from huggingface_hub.hf_api import HfFolder

notebook_login()
HfFolder.save_token('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')

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
model.push_to_hub("zuu/alpaca7B-lora-finetuning", use_auth_token=True)

CommitInfo(commit_url='https://huggingface.co/zuu/alpaca7B-lora-finetuning/commit/f05f3b7dd31ead1faaf3a0ff463b5265b13d6526', commit_message='Upload model', commit_description='', oid='f05f3b7dd31ead1faaf3a0ff463b5265b13d6526', pr_url=None, pr_revision=None, pr_num=None)

# Inference

In [1]:
from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig

tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model = LLaMAForCausalLM.from_pretrained(
                                        "decapoda-research/llama-7b-hf",
                                        load_in_8bit=True,
                                        device_map="auto",
                                        )
model = PeftModel.from_pretrained(model, "zuu/alpaca7B-lora-finetuning")


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /home/ec2-user/anaconda3/envs/pytorch_p39/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.0
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so...


  warn(msg)


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [2]:
PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Tell me something about alpacas.
### Response:"""

inputs = tokenizer(
                PROMPT,
                return_tensors="pt",
                )
input_ids = inputs["input_ids"].cuda()

generation_config = GenerationConfig(
                                    temperature=0.6,
                                    top_p=0.95,
                                    repetition_penalty=1.15,
                                )

print("Generating...")
generation_output = model.generate(
                                input_ids=input_ids,
                                generation_config=generation_config,
                                return_dict_in_generate=True,
                                output_scores=True,
                                max_new_tokens=128,
                            )
for s in generation_output.sequences:
    print(tokenizer.decode(s))

Generating...
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Tell me something about alpacas.
### Response:
Alpaca are animals that live in South America and have long, soft furry coats. They can be any color from white to black or brown. Alpacas are very friendly and gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle gentle
