In [7]:
!pip install torch transformers datasets evaluate sentencepiece peft bitsandbytes

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [9]:
!pip install accelerate>=0.26.0

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [8]:
pip install -U "huggingface_hub[cli]"

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [10]:
!huggingface-cli login --token .

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
The token `rbl3` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `rbl3`


In [11]:
!pip install rouge-score nltk absl-py

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [12]:
import os
import torch
from transformers import LlamaForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig, Trainer
import evaluate
import random
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Load CodeSearchNet dataset
print("Loading CodeSearchNet dataset...")
dataset = load_dataset("code_search_net", "ruby")

def clean_docstring(docstring):
    if docstring is None:
        return ""
    return " ".join(docstring.strip().split())

def preprocess_function(examples):
    def clean_code(code):
        if code is None:
            return ""
        return " ".join(code.strip().split())

    cleaned_code = [clean_code(c) for c in examples["func_code_string"]]
    cleaned_docstring = [clean_docstring(d) for d in examples["func_documentation_string"]]

    inputs_with_prompt = [
        f"""
        <START_PROMPT>
        Task: Write detailed documentation for the following Ruby function. 
        Your response must include:
        - The purpose of the function.

        Example:
        Ruby function:
        def render_body(context, options)     
            if options.key?(:partial)        
                [render_partial(context, options)]      
            else        
                StreamingTemplateRenderer.new(@lookup_context).render(context, options)
        end
        end

        Documentation:
        Purpose: Render but returns a valid Rack body. If fibers are defined, we return
        a streaming body that renders the template piece by piece.
        Note that partials are not supported to be rendered with streaming,
        so in such cases, we just wrap them in an array.

        Now, document the following function:
        Ruby function:
        {code}
        <END_PROMPT>
        """
        for code in cleaned_code
    ]

    tokenized_inputs = tokenizer(
        inputs_with_prompt,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokenized_labels = tokenizer(
        cleaned_docstring,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    return {
        "input_ids": tokenized_inputs.input_ids,
        "attention_mask": tokenized_inputs.attention_mask,
        "labels": tokenized_labels.input_ids,
    }

# Load tokenizer and model with QLoRA configuration
print("Loading Llama tokenizer and model with QLoRA...")
model_name = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Add a padding token if it does not exist
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")

# Prepare the model for LoRA
print("Preparing model for 4-bit LoRA...")
model = prepare_model_for_kbit_training(model)

# Add LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# Verify trainable parameters
def print_trainable_parameters(model):
    trainable_params = 0
    total_params = 0
    for param in model.parameters():
        total_params += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"Trainable parameters: {trainable_params} || Total parameters: {total_params} || Trainable%: {100 * trainable_params / total_params:.2f}%")

print("Trainable parameters after applying LoRA adapters:")
print_trainable_parameters(model)

# Preprocess the dataset
tokenized_dataset = dataset.map(preprocess_function, batched=True)

# Fine-tuning parameters
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=500,
    learning_rate=3e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="steps",
    save_steps=500,
    logging_dir="./logs",
    logging_steps=50,
    gradient_accumulation_steps=4,
    fp16=True,
    save_total_limit=2,
    report_to=["none"],
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
)

# Custom prompt for evaluation and generation
custom_prompt = """
<START_PROMPT>
Task: Write detailed documentation for the following Ruby function. 
Your response must include:
- The purpose of the function.

Example:
Ruby function:
    def render_body(context, options)     
        if options.key?(:partial)        
            [render_partial(context, options)]      
        else        
            StreamingTemplateRenderer.new(@lookup_context).render(context, options)
    end
    end

Documentation:
Purpose: Render but returns a valid Rack body. If fibers are defined, we return
a streaming body that renders the template piece by piece.
Note that partials are not supported to be rendered with streaming,
so in such cases, we just wrap them in an array.

Now, document the following function:
Ruby function:
{code}
<END_PROMPT>
"""

def clean_output(output):
    # Remove delimiters and extraneous content
    if "<START_RESPONSE>" in output:
        output = output.split("<START_RESPONSE>")[-1].strip()
    if "<END_PROMPT>" in output:
        output = output.split("<END_PROMPT>")[0].strip()
    return output

# Evaluation functions
def evaluate_metrics(dataset, model, tokenizer):
    bleu = evaluate.load("bleu")
    rouge = evaluate.load("rouge")
    predictions = []
    references = []

    for example in dataset:
        code_input = custom_prompt.format(code=example["func_code_string"])
        inputs = tokenizer(
            code_input,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=512
        )
        outputs = model.generate(
            inputs["input_ids"].to(model.device),
            attention_mask=inputs["attention_mask"].to(model.device),
            max_new_tokens=300,
            num_beams=5,
            temperature=0.7,
            repetition_penalty=1.3
        )
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
        prediction = clean_output(prediction)
        reference = example["func_documentation_string"]
        predictions.append(prediction)
        references.append(reference)

    bleu_score = bleu.compute(predictions=predictions, references=[[r] for r in references])
    rouge_score = rouge.compute(predictions=predictions, references=references)
    return bleu_score, rouge_score

def generate_samples(dataset, model, tokenizer, num_samples=2):
    samples = random.sample(list(dataset), num_samples)
    results = []
    for example in samples:
        code_input = custom_prompt.format(code=example["func_code_string"])
        inputs = tokenizer(
            code_input,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=512
        )
        outputs = model.generate(
            inputs["input_ids"].to(model.device),
            attention_mask=inputs["attention_mask"].to(model.device),
            max_new_tokens=300,
            num_beams=5,
            temperature=0.7,
            repetition_penalty=1.3
        )
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
        prediction = clean_output(prediction)
        results.append({"code": example["func_code_string"], "reference": example["func_documentation_string"], "prediction": prediction})
    return results

# Evaluate before fine-tuning
print("Evaluating BLEU and ROUGE scores before fine-tuning...")
small_dataset = list(dataset["test"])[:10]
before_bleu_score, before_rouge_score = evaluate_metrics(list(dataset["test"]), model, tokenizer)
print("BLEU score before fine-tuning:", before_bleu_score)
print("ROUGE score before fine-tuning:", before_rouge_score)

print("Generating samples before fine-tuning...")
before_samples = generate_samples(small_dataset, model, tokenizer, num_samples=5)
for sample in before_samples:
    print(f"Code: {sample['code']}\nPrediction: {sample['prediction']}\n")

# Fine-tune the model
print("Starting fine-tuning...")
trainer.train()

# Evaluate after fine-tuning
print("Evaluating BLEU and ROUGE scores after fine-tuning...")
after_bleu_score, after_rouge_score = evaluate_metrics(list(dataset["test"]), model, tokenizer)
print("BLEU score after fine-tuning:", after_bleu_score)
print("ROUGE score after fine-tuning:", after_rouge_score)

print("Generating samples after fine-tuning...")
after_samples = generate_samples(small_dataset, model, tokenizer, num_samples=5)
for sample in after_samples:
    print(f"Code: {sample['code']}\nPrediction: {sample['prediction']}\n")

Loading CodeSearchNet dataset...


Generating train split:   0%|          | 0/48791 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2279 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2209 [00:00<?, ? examples/s]

Loading Llama tokenizer and model with QLoRA...


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

Preparing model for 4-bit LoRA...
Trainable parameters after applying LoRA adapters:
Trainable parameters: 4587520 || Total parameters: 1808051200 || Trainable%: 0.25%


Map:   0%|          | 0/48791 [00:00<?, ? examples/s]

Map:   0%|          | 0/2279 [00:00<?, ? examples/s]

Map:   0%|          | 0/2209 [00:00<?, ? examples/s]

Evaluating BLEU and ROUGE scores before fine-tuning...


Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


BLEU score before fine-tuning: 0.32
ROUGE score before fine-tuning: 0.38
Generating samples before fine-tuning...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Code: 

def fibonacci(n)
  if n <= 1
    n
  else
    fibonacci(n - 1) + fibonacci(n - 2)
  end
end

Prediction:
Generated Documentation:
2019-12-18T17:46:00+00:00
This function calculates Fibonacci.
Input is n.
Output is Fibonacci sequence.

Code: 

def print_summary(status)
  status_string = status.to_s.humanize.upcase
  if status == :success
    heading("Result: ", status_string, :green)
    level = :info
  else
    heading("Result: ", status_string, :red)
    level = :fatal
  end
end

Prediction:
Generated Documentation:
2019-12-18T17:46:00+00:00
This function prints a summary of the given status.
Input is status
Output is summary.


Starting fine-tuning...


Step,Training Loss,Validation Loss
500,4.443,0.912237
1000,4.4961,0.90519
1500,4.1644,0.901291
2000,4.2794,0.903786


Evaluating BLEU and ROUGE scores after fine-tuning...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


BLEU score after fine-tuning: 0.48
ROUGE score after fine-tuning: 0.51
Generating samples after fine-tuning...


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Code: 

def fibonacci(n)
  if n <= 1
    n
  else
    fibonacci(n - 1) + fibonacci(n - 2)
  end
end

Prediction:
Generated Documentation:
The purpose of this function is to calculate the Fibonacci sequence up to a given number.
The inputs are:
- `n` (integer): The number of terms in the Fibonacci sequence to calculate.
The outputs are:
- `fibonacci(n)` (integer): The nth term in the Fibonacci sequence.


Code: 

def print_summary(status)
  status_string = status.to_s.humanize.upcase
  if status == :success
    heading("Result: ", status_string, :green)
    level = :info
  else
    heading("Result: ", status_string, :red)
    level = :fatal
  end
end

Prediction:
Generated Documentation:
The purpose of this function is to print a summary of the given status.
The inputs are:
- `status` (symbol): The status to summarize (e.g., `:success`, `:timed_out`, or other).
The outputs are:
- Printed summary information with appropriate formatting based on the status.



