## Finetune LLAMA-2 7B model using QLoRA for code summarization

### Setup
##### Install dependencies

In [1]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops
!pip install huggingface_hub
!pip install pytorch-ignite

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.1/88.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m66.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.3/519.3 kB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m100.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m81.5 MB/s[0m eta [36m

##### Import libraries

In [2]:
from datasets import load_dataset
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments
from trl import SFTTrainer

import re
from tqdm import tqdm
from ignite.metrics import RougeL
from ignite.metrics.nlp import Bleu

from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

##### Load dataset from HuggingFace - Private Datatset in my account

In [3]:
dataset_name = 'Tejus1/codeSearchNet_prompt_texts'
data_files = {"train": "train.csv", "test": "test.csv"}
dataset = load_dataset(dataset_name, data_files=data_files)

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/401M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.2M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

#### Load Model and perform 4-bit quantization

In [4]:
model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

Downloading (…)lve/main/config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Load Tokenizer

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

LoRA Hyperparameters

In [None]:
lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

Model Hyperparameters

In [None]:
output_dir = "./results"
per_device_train_batch_size = 46
gradient_accumulation_steps = 8
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 2
evaluation_strategy = "steps"
eval_steps = 200
eval_accumulation_steps = 5
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 800
warmup_ratio = 0.03
lr_scheduler_type = "constant"


training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    do_eval=True,
    evaluation_strategy = evaluation_strategy,
    eval_steps = eval_steps,
    eval_accumulation_steps = eval_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

Configure Trainer

In [None]:
max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)



Map:   0%|          | 0/236742 [00:00<?, ? examples/s]

Map:   0%|          | 0/14036 [00:00<?, ? examples/s]

In [None]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

### Train

In [None]:
# trainer.train()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
200,0.9074,1.093143
400,0.8941,1.088113


Step,Training Loss,Validation Loss
200,0.9074,1.093143
400,0.8941,1.088113
600,0.9108,1.084171
800,1.0549,1.070854


TrainOutput(global_step=800, training_loss=1.061251674592495, metrics={'train_runtime': 30623.9001, 'train_samples_per_second': 9.613, 'train_steps_per_second': 0.026, 'total_flos': 1.4748782789935104e+18, 'train_loss': 1.061251674592495, 'epoch': 1.24})

### Save Model

In [None]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("outputs")

### Inference

In [6]:
lora_config = LoraConfig.from_pretrained('Tejus1/llama2-qlora-finetunined-code_summarization')
model = get_peft_model(model, lora_config)

Downloading (…)/adapter_config.json:   0%|          | 0.00/446 [00:00<?, ?B/s]

In [8]:
text = """Below is the code of a Python function. Please write a short comment describing what the function does. \n\n### Instruction:\ndef cbs_download(url, output_dir=\'.\', merge=True, info_only=False, **kwargs):\n    \n\n    html = get_content(url)\n    pid = match1(html, r\'video\\.settings\\.pid\\s*=\\s*\\\'([^\\\']+)\\\'\')\n    title = match1(html, r\'video\\.settings\\.title\\s*=\\s*\\"([^\\"]+)\\"\')\n\n    theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only)\n### Response:\n"""
device = "cuda:0"

inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=128)
generated_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_summary)

Below is the code of a Python function. Please write a short comment describing what the function does. 

### Instruction:
def cbs_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
    

    html = get_content(url)
    pid = match1(html, r'video\.settings\.pid\s*=\s*\'([^\']+)\'')
    title = match1(html, r'video\.settings\.title\s*=\s*\"([^\"]+)\"')

    theplatform_download_by_pid(pid, title, output_dir=output_dir, merge=merge, info_only=info_only)
### Response:
cbs_download('http://www.cbs.com/shows/survivor/video/', output_dir='.', merge=True, info_only=False)

### Explanation:
This function is used to download videos from CBS website. It is the main function of this script.

The function downloads the video from the given URL and saves it in the given output_dir. If merge is set to True, then the downloaded video will be merged with the previous downloaded video. Otherwise, the downloaded video will be saved as a new file.

The function


### Evaluation

In [18]:
def extract_prompt(text):
    match = re.search(r'^(.+)### Response:\n', text, re.DOTALL)
    return match.group(0).strip() if match else ' '

def extract_response(text):
    match = re.search(r'### Response:\n(.+)', text, re.DOTALL)
    return match.group(1).strip() if match else ' '

In [19]:
# below is the code to calculate the BLEU and rouge score for the model using the test dataset
def calculate_bleu_rougel(model: torch.nn.Module, data):
    targets = []
    generated = []
    for text, target_str in data:
        prompt = extract_prompt(text)

        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(**inputs, max_new_tokens=128)

        generated_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

        response = extract_response(generated_summary)
        correct_summary_tokens = target_str.split()
        targets.append(correct_summary_tokens)
        generated.append(response.split())

    rouge_metric = RougeL(multiref="best")
    bleu = Bleu(ngram=1, smooth="smooth1")

    bleu.update(([generated_summary], [correct_summary_tokens]))
    rouge_metric.update((generated, targets))

    print("BLEU-1: ", bleu.compute())
    print("RougeL: ", rouge_metric.compute())

    return bleu.compute(), rouge_metric.compute()

In [20]:
calculate_bleu_rougel(model, list(zip(dataset['test']['text'], dataset['test']['docstring'])))

BLEU-1:  tensor(0.0360, dtype=torch.float64)
RougeL:  {'Rouge-L-P': 0.007583560153709015, 'Rouge-L-R': 0.17072581891643007, 'Rouge-L-F': 0.17072581891643007}


(tensor(0.0360, dtype=torch.float64),
 {'Rouge-L-P': 0.007583560153709015,
  'Rouge-L-R': 0.17072581891643007,
  'Rouge-L-F': 0.17072581891643007})

Pusing the model weights to HuggingFace

In [None]:
model.push_to_hub("llama2-qlora-finetunined-code_summarization")

adapter_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Tejus1/llama2-qlora-finetunined-code_summarization/commit/6e0f6bf55bf2dabf7a86d05a114f9fda938d425f', commit_message='Upload model', commit_description='', oid='6e0f6bf55bf2dabf7a86d05a114f9fda938d425f', pr_url=None, pr_revision=None, pr_num=None)