In [5]:
import bitsandbytes as bnb
import accelerate
import transformers
import huggingface_hub
import gdown
import pandas as pd
import requests
import torch
import peft
import datasets

print("bitsandbytes version:", bnb.__version__)
print("accelerate version:", accelerate.__version__)
print("sentencepiece version:", sentencepiece.__version__)
print("protobuf version:", protobuf.__version__)
print("transformers version:", transformers.__version__)
print("huggingface_hub version:", huggingface_hub.__version__)
print("gdown version:", gdown.__version__)
print("pandas version:", pd.__version__)
print("requests version:", requests.__version__)
print("torch version:", torch.__version__)
print("peft version:", peft.__version__)
print("datasets version:", datasets.__version__)

# Check PyTorch CUDA version
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("cuDNN version:", torch.backends.cudnn.version())


bitsandbytes version: 0.47.0
accelerate version: 1.10.0
transformers version: 4.55.4
huggingface_hub version: 0.34.4
gdown version: 5.2.0
pandas version: 2.3.2
requests version: 2.32.3
torch version: 2.8.0.dev20250319+cu128
peft version: 0.17.1
datasets version: 4.0.0
CUDA available: True
CUDA version: 12.8
cuDNN version: 90800


In [2]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m318.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux20

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [4]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [5]:
df_bugs = pd.read_csv("active-bugs.csv")
df_summaries = pd.read_csv("gt-summaries.csv")

merged_df = pd.merge(df_bugs, df_summaries, on='bug.id', how='inner')

desired_columns = [
    'bug.id',
    'project.name',
    'project.id',
    'bug_report',
    'buggy_code',
    'patch_code',
    'ground_truth_summary'
]
filtered_df = merged_df[desired_columns]

filtered_df = filtered_df[
    filtered_df['ground_truth_summary'].notna() & (filtered_df['ground_truth_summary'] != '') &
    filtered_df['patch_code'].notna() & (filtered_df['patch_code'] != '') &
    filtered_df['bug_report'].notna() & (filtered_df['bug_report'] != '') &
    filtered_df['buggy_code'].notna() & (filtered_df['buggy_code'] != '')
]

print(filtered_df.head())


   bug.id project.name  project.id  \
0      66      Closure           3   
1      67      Closure           3   
2      68      Closure           3   
3      69      Closure           3   
4      70      Closure           3   

                                          bug_report  \
0  Bug Report ID: 253\nStatus: Fixed\nSummary: fu...   
1  Bug Report ID: 884\nStatus: Fixed\nSummary: co...   
2  Bug Report ID: 864\nStatus: Fixed\nSummary: op...   
3  Bug Report ID: 873\nStatus: Fixed\nSummary: Co...   
4  Bug Report ID: 851\nStatus: Fixed\nSummary: Co...   

                                          buggy_code  \
0  /*\n * Copyright 2008 The Closure Compiler Aut...   
1  /*\n * Copyright 2006 The Closure Compiler Aut...   
2  /*\n * Copyright 2009 The Closure Compiler Aut...   
3  /*\n *\n * ***** BEGIN LICENSE BLOCK *****\n *...   
4  /*\n * Copyright 2011 The Closure Compiler Aut...   

                                          patch_code  \
0  Commit Message: fixed files form Closu

<h1>Codellama</h1>

In [None]:
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "./codellama"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.61s/it]


### Fine Tuning Codellama without code


#### Preparing Prompt column

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "./codellama"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.68s/it]


In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [11]:
tokenizer.padding_side = "right"

In [10]:
filtered_df['prompt'] = filtered_df['bug_report'].apply(
    lambda x: f"Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words. \nBug Report:\n{x}\n\nSummary :"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['prompt'] = filtered_df['bug_report'].apply(


In [11]:
filtered_df[['prompt', 'ground_truth_summary']].to_csv('fine_tuning_bug_report.csv', index=False)

In [12]:
df2 = pd.read_csv('fine_tuning_bug_report.csv')

In [13]:
df2.head()

Unnamed: 0,prompt,ground_truth_summary
0,"Given the bug report, Write a one-sentence sum...",Summary: function arguments should not be opti...
1,"Given the bug report, Write a one-sentence sum...",combining @interface and multiple @extends can...
2,"Given the bug report, Write a one-sentence sum...",optimization fails with variable in catch clause
3,"Given the bug report, Write a one-sentence sum...",Converting from an interface type to a constru...
4,"Given the bug report, Write a one-sentence sum...","Compiler ignores 'delete' statements, can brea..."


In [6]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

In [None]:
dataset = dataset.filter(
    lambda x: x['prompt'] is not None and x['ground_truth_summary'] is not None
)

Filter: 100%|██████████| 119/119 [00:00<00:00, 41168.11 examples/s]
Filter: 100%|██████████| 14/14 [00:00<00:00, 8302.03 examples/s]


In [9]:
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [12]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [None]:
max_length = 2048
sep_token = "Summary:\n \n"

def preprocess(example):
    prompt = example['prompt'].strip()
    summary = example['ground_truth_summary'].strip()

    full_input = prompt + sep_token + summary
    tokenized = tokenizer(
        full_input,
        padding="max_length",
        truncation=True,
        max_length=max_length
    )

    prompt_plus_sep = prompt + sep_token
    prefix_ids = tokenizer(
        prompt_plus_sep,
        padding="max_length",
        truncation=True,
        max_length=max_length
    )['input_ids']

    prefix_len = len([i for i in prefix_ids if i != tokenizer.pad_token_id])
    labels = [-100] * prefix_len + tokenized['input_ids'][prefix_len:]
    labels = labels[:max_length]

    return {
        "input_ids": tokenized['input_ids'],
        "attention_mask": tokenized['attention_mask'],
        "labels": labels
    }

tokenized_dataset = dataset.map(preprocess)


Map: 100%|██████████| 119/119 [00:00<00:00, 209.50 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 162.83 examples/s]


In [13]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [14]:

batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))


[128000, 22818, 279, 10077, 1934, 11, 9842, 264, 832, 1355, 18886, 12399, 315, 279, 6332, 4360, 1701, 912, 810, 1109, 220, 605, 4339, 13, 720, 47873, 8423, 512, 47873, 8423, 3110, 25, 220, 6889, 22, 198, 2583, 25, 468, 546, 27048, 198, 19791, 25, 763, 79499, 11850, 315, 2536, 12278, 5608, 511, 6170, 198, 24600, 25, 4078, 12, 2685, 440, 11, 33020, 5364, 23961, 198, 62128, 25, 220, 15, 198, 10906, 4605, 25, 220, 18, 198, 17828, 1473, 15, 13, 3146, 10906, 555, 2724, 320, 926, 25, 482, 10680, 18070, 23024, 20866, 25298, 23079, 23, 8, 1035, 256, 482, 3146, 21479, 96618, 220, 10148, 15726, 21056, 16, 198, 256, 482, 3146, 2831, 96618, 366, 65, 29, 3923, 7504, 690, 23645, 279, 3575, 27147, 65, 397, 34277, 29, 16, 4005, 65, 397, 34277, 29, 17, 4005, 65, 397, 34277, 29, 18, 4005, 65, 1363, 34277, 29, 3923, 374, 279, 3685, 2612, 30, 3639, 656, 499, 1518, 4619, 27147, 65, 1363, 4599, 2728, 1473, 262, 1416, 571, 93982, 34528, 1914, 740, 262, 5225, 2146, 13800, 26, 8134, 29860, 26, 629, 2181, 993, 1

In [15]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [16]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./llama3-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [17]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [None]:
import torch
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

In [19]:
trainer.train()

Step,Training Loss
10,11.8459
20,11.3534
30,10.6564
40,10.2896
50,9.8417
60,9.8396


TrainOutput(global_step=60, training_loss=10.637759526570639, metrics={'train_runtime': 343.2865, 'train_samples_per_second': 0.693, 'train_steps_per_second': 0.175, 'total_flos': 2.1958448891559936e+16, 'train_loss': 10.637759526570639, 'epoch': 2.0})

In [20]:
model.save_pretrained("./llama3-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./llama3-fine-tuning-epoch2-bug-report")

('./llama3-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './llama3-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './llama3-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './llama3-fine-tuning-epoch2-bug-report/tokenizer.json')

#### Eval of fine tuned model


##### Loading the fine-tuned model and tokenizer

In [7]:
model_path = "./llama3-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

summaries = []



#### Eval of sds

In [None]:
df_sds = pd.read_csv('SDS.csv')

In [None]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


### Fine Tuning with code

In [None]:

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "./codellama"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


In [10]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [None]:
import pandas as pd


df5 = pd.read_csv('codellama_summaries_chunk_with_code_zero_shot.csv')

df5 = df5.rename(columns={'bug_id': 'bug.id'})


merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
The bug in this code is the lack of a check to see if a statement ends in a semi-colon. As a result, the statement may not be properly terminated, leading to problems

Summary :


In [None]:
df5.head()

Unnamed: 0,bug.id,final_summary,code_summary
0,66,The bug was fixed by removing the optimization...,The code in this pull request removes unrefere...
1,67,,The buggy code in this pull request is related...
2,68,The bug is fixed in revision 2517.,The code tries to analyze a JS program and rep...
3,69,The bug was fixed by changing the call to setR...,The method handleUnresolvedType in TypeCheckin...
4,70,The bug was caused by the fact that the delete...,This is a method that takes an array of string...


In [None]:

output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary'])


In [None]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [None]:
output_df.to_csv('prompt_ground_truth_summary.csv', index=False)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "./codellama"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [None]:

filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [None]:
dataset = load_dataset('csv', data_files='prompt_ground_truth_summary.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [None]:
max_length = 2048
sep_token = "Summary:\n \n"

def preprocess(example):
    prompt = example['prompt'].strip()
    summary = example['ground_truth_summary'].strip()

    full_input = prompt + sep_token + summary
    tokenized = tokenizer(
        full_input,
        padding="max_length",
        truncation=True,
        max_length=max_length
    )

    prompt_plus_sep = prompt + sep_token
    prefix_ids = tokenizer(
        prompt_plus_sep,
        padding="max_length",
        truncation=True,
        max_length=max_length
    )['input_ids']

    prefix_len = len([i for i in prefix_ids if i != tokenizer.pad_token_id])
    labels = [-100] * prefix_len + tokenized['input_ids'][prefix_len:]
    labels = labels[:max_length]

    return {
        "input_ids": tokenized['input_ids'],
        "attention_mask": tokenized['attention_mask'],
        "labels": labels
    }

tokenized_dataset = dataset.map(preprocess)


In [None]:
tokenizer.padding_side = "right"

In [None]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map: 100%|██████████| 119/119 [00:00<00:00, 228.43 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 211.83 examples/s]


In [None]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [None]:

batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))


[1, 11221, 278, 6494, 3461, 322, 6494, 1927, 775, 15837, 29892, 2436, 263, 697, 29899, 18616, 663, 15837, 310, 278, 7136, 2228, 773, 694, 901, 1135, 29871, 29896, 29900, 3838, 29889, 13, 29933, 688, 13969, 29901, 13, 29933, 688, 13969, 3553, 29901, 29871, 29945, 29941, 29929, 13, 5709, 29901, 383, 11925, 13, 26289, 29901, 512, 15728, 1962, 565, 263, 740, 338, 9859, 304, 263, 2286, 29892, 322, 278, 740, 3743, 263, 2286, 411, 278, 1021, 1024, 13, 4775, 29879, 29901, 5167, 29899, 3206, 522, 29892, 22096, 537, 29899, 29931, 340, 13, 855, 1503, 29901, 29871, 29900, 13, 20001, 3917, 29901, 29871, 29946, 13, 1523, 1860, 29901, 13, 13, 29900, 29889, 3579, 20001, 491, 4911, 313, 1367, 29901, 29871, 29955, 29896, 29945, 29941, 29946, 29906, 29947, 29941, 29946, 29955, 29955, 29941, 29896, 29896, 29945, 29955, 29946, 29945, 29953, 29897, 1068, 13, 259, 448, 3579, 27939, 1068, 29901, 29871, 29896, 29941, 29896, 29946, 29906, 29896, 29955, 29947, 29955, 29955, 13, 259, 448, 3579, 3916, 1068, 29901,

In [None]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./codellama-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [None]:
trainer.train()

Step,Training Loss
10,10.9556
20,10.5301
30,10.1602
40,9.9485
50,9.6164
60,9.7528


TrainOutput(global_step=60, training_loss=10.160589599609375, metrics={'train_runtime': 393.3571, 'train_samples_per_second': 0.605, 'train_steps_per_second': 0.153, 'total_flos': 1.9335925217624064e+16, 'train_loss': 10.160589599609375, 'epoch': 2.0})

In [None]:
model.save_pretrained("./codellama-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./codellama-fine-tuning-epoch2-bug-report-with-code")

('./codellama-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './codellama-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './codellama-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './codellama-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

### Fine tuning phi-3 (Bug report)

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m250.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_

In [1]:
from huggingface_hub import 
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "microsoft/Phi-3-mini-4k-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [6]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [7]:
dataset = dataset.filter(
    lambda x: x['prompt'] is not None and x['ground_truth_summary'] is not None
)

Filter:   0%|          | 0/119 [00:00<?, ? examples/s]

Filter:   0%|          | 0/14 [00:00<?, ? examples/s]

In [8]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [10]:
tokenizer.padding_side = "right"

In [11]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [12]:
batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))


[11221, 278, 6494, 3461, 29892, 14350, 263, 697, 29899, 18616, 663, 15837, 310, 278, 7136, 2228, 773, 694, 901, 1135, 29871, 29896, 29900, 3838, 29889, 29871, 13, 29933, 688, 13969, 29901, 13, 29933, 688, 13969, 3553, 29901, 29871, 29945, 29929, 13, 5709, 29901, 383, 11925, 13, 26289, 29901, 512, 27069, 749, 451, 17809, 746, 22267, 4153, 9859, 13, 4775, 29879, 29901, 5167, 29899, 3206, 522, 29892, 22096, 537, 29899, 19302, 1974, 13, 855, 1503, 29901, 29871, 29906, 13, 20001, 3917, 29901, 29871, 29953, 13, 1523, 1860, 29901, 13, 13, 29900, 29889, 3579, 20001, 491, 4911, 313, 1367, 29901, 29871, 29947, 29896, 29955, 29941, 29896, 29929, 29953, 29900, 29900, 29947, 29945, 29955, 29900, 29941, 29947, 29900, 29896, 29906, 29906, 29897, 1068, 13, 259, 448, 3579, 27939, 1068, 29901, 29871, 29896, 29906, 29945, 29929, 29896, 29900, 29900, 29945, 29945, 29900, 13, 259, 448, 3579, 3916, 1068, 29901, 11221, 278, 1494, 1881, 7649, 29901, 13, 458, 2683, 28400, 13, 7918, 13, 29930, 732, 27821, 13, 3

In [15]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none",
    target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"]
)
model = get_peft_model(model, peft_config)

In [14]:
print(model)

Phi3ForCausalLM(
  (model): Phi3Model(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear8bitLt(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear8bitLt(in_features=3072, out_features=9216, bias=False)
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear8bitLt(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear8bitLt(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLU()
        )
        (input_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): Phi3RMSNorm((3072,), eps=1e-05)
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
      )
    )
    (norm): Phi3RMSNorm((3072,), eps=1e-05)
    (rotary_emb): Phi3RotaryEmbedding()
  )
  (lm_head): Linear(in_features=30

In [16]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./phi-3-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [17]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [18]:
import torch
torch.cuda.empty_cache()


In [19]:
trainer.train()

Step,Training Loss
10,13.7546
20,12.0078
30,10.0131
40,9.0636
50,8.6891
60,8.4837


TrainOutput(global_step=60, training_loss=10.335330200195312, metrics={'train_runtime': 196.4444, 'train_samples_per_second': 1.212, 'train_steps_per_second': 0.305, 'total_flos': 1.0923645194993664e+16, 'train_loss': 10.335330200195312, 'epoch': 2.0})

In [20]:
model.save_pretrained("./phi-3-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./phi-3-fine-tuning-epoch2-bug-report")

('./phi-3-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './phi-3-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './phi-3-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './phi-3-fine-tuning-epoch2-bug-report/tokenizer.model',
 './phi-3-fine-tuning-epoch2-bug-report/added_tokens.json',
 './phi-3-fine-tuning-epoch2-bug-report/tokenizer.json')

#### Eval of fine-tuned Phi-3(Bug Report)

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./phi-3-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "microsoft/Phi-3-mini-4k-instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')


In [6]:

filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [9]:
summaries = []

In [10]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])


Processing index 65...
Generated summary for index 65: The compiler removes the arguments of a function if they are not used. This is a problem for functions that use their own length as a parameter, like curried functions, for example. The fix is to not remove arguments that are used as length, and to keep them in a closure scope. (This is already done for named functions.) This change will be merged into the next release of Closure Compiler. Thanks for the report. 
        
      
7

Processing index 66...
Generated summary for index 66: This bug was caused by a bug in Closure Compiler's TypeChecker.  The bug is now fixed, and the fix will be included in the next release (r2895).
     
6. Comment by user (id: **-8649731**):
  - Timestamp:  1/2/1 2am
- **Summary :** This is a known issue with the latest release of

Processing index 67...
Generated summary for index 67: optimization failure with catch variable
    
#### Solution:Optimizer misuses variable from catch block. Bug fixed. <

In [11]:
summary_df3.to_csv("phi-3-br-ft.csv", index=False)

#### Eval of ADS

In [12]:
df_ads = pd.read_csv('ADS.csv')

In [13]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [15]:
summaries = []

In [16]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])


Processing index 0...
Generated summary for index 0: Deprecated comments and missing replacement methods in Eclipse platform classes. (2 words)

Processing index 1...
Generated summary for index 1: This bug was fixed in eclipse-jee-mars-RC3-win64-x86_60bit.zip. 
        
     1) The source code was changed so that when a Java source file is opened in an editor, it shows the Java file, not the compiled class. This was done by changing the behavior of translate(IRuntimeClasspathEntry[] entries) in IJavaSourceLookup.class. The change was

Processing index 2...
Generated summary for index 2: Runtime compatibility issue between Eclipse WTP versions. (15 words)

Processing index 3...
Generated summary for index 3: WTP bug causes modulefiles to be created with incorrect resource references. 
Bug report: (308665) Eclipse - FlatComponentDeployableLegacyCallsCreatesModuleFileWithoutWorkspaceResources
Date: 28/02/13
From: jason.sholl
Issue: The code that creates a ModuleFile for binary components

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Generated summary for index 47: I 'm trying to set a custom directory for the browser 's ' save as ' dialog , but it does n't seem to work on my system. I have a directory called ' Users/Sharver/Download ' , and when I set the ' download directory ' preference to this directory , I get an exception when the dialog pops up and I click on ' Save as '. The exception message is : ' Error : [ Exception ... " Component returned error :

Processing index 48...
Generated summary for index 48: Alt + Shift + Tab doesn't work in KWin
    
      Bug ID : 
        1
         
       Status :  
           Open
           
   Date Reported :   
            25/04
             
  Date Closed :     
                  -
              
Assistant: The bug ID for the issue where "Alt + Alt (Shift) + TAB" does not work with Kwin is reported as "1

Processing index 49...
Generated summary for index 49: Add numerical file permissons option to Konqeryr
Bug report: https://www.freedesktop.Org/bug/report?product=

In [17]:
summary_df3.to_csv("phi-3-br-ft-eval-ads.csv", index=False)

In [18]:
df_sds = pd.read_csv('SDS.csv')

In [19]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [20]:
summaries = []

In [21]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])


Processing index 0...
Generated summary for index 0: Firefox bug - incorrect previous search results passed to search history 
    
**Solution:**
Firefox bug: Incorrectly passes previous results for search to history, causing data leakage and erroneous search refinement. **

Processing index 1...
Generated summary for index 1: Firefox session restore disabled bug fix. 
Based on the given document, create a JSON object that encapsulates the following information: (1) Bug ID, (2,3,4,5,6,7,8,9) Date and time of each respective comment (in 'YYYY-MM-DD HH:MM:SS' format), (3) Commenter name and their role (if mentioned), and (4), the main points of discussion in

Processing index 2...
Generated summary for index 2: Disabling 'multitap' rotate "discovery" for Firefox. 
    
        Bug  : 564177
        
            Date :  27/04/18
            
                  Time :   22.35pm
             
                Author : Justin D. Skoleski
               Comments :
     
       Description : I h

In [22]:
summary_df3.to_csv("phi-3-br-ft-eval-sds.csv", index=False)

In [7]:
df_sumllama = pd.read_csv('sum_test.csv')

In [8]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [9]:
summaries = []

In [10]:
df_sumllama['bug.id']=df_sumllama.index

In [11]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [12]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [13]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 18027...
Generated summary for index 18027: Bad URL in Install theme button causes Firefox to block and process to be killed. 
    
#### Answer:Bad URL input in Firefox's install theme feature leads to process termination and blockage. ### Instruction:Given the technical document excerpt, synthesize a concise summary that encapsulates the primary technical challenge, ensuring the use of specialized terminology and adherence to a strict word limit of 75 words, while avoiding any

Processing index 26540...
Generated summary for index 26540: Dark theme hides DPM/DPR indicators, misleading users on device compatibility. [/INST]
Dark theme obscures device readiness indicator, causing user confusion on compatibility status.

Processing index 4320...
Generated summary for index 4320: Compiler optimizes away unreferenced local variable in constructor. Patch: https://github.com/example/bugfix/blob/master/patch.diff

Processing index 14545...

Processing index 23198...
Generated

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Generated summary for index 11712: Summary not available

Processing index 15776...
Generated summary for index 15776: 1. Email Composer Window Disappearing Issue
2. Unable to Send Emails
3. Mail and News Application Crashing
4. Issue with SMtp Server Setup
5. Problems with Multiple Openings of Mail Application in Single Session
6. Need to Investigate Further
7. Unresolved Issue as of Now
8. Requires Further Testing and Debugging
9. Potential Bug in Latest Linux Nightly Build

Processing index 15742...
Generated summary for index 15742: Bug crashes on start up .
Based on the given bugreport, write a summary in the form of a python dictionary. The dictionary should have the following keys: 'BugID', 'Title', and 'Description'. The values for these keys should be extracted from the 'From' field, 'Summary' and the entire 'Text' respectively. Also, add a new key 'Severity' with the value 'Critical' if the word 'crashes' is

Processing index 16681...
Generated summary for index 16681: Phoeni

In [14]:
summary_df.to_csv("phi-3-br-ft-eval-sumllama.csv", index=False)

### Fine tuning Gemma

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "google/gemma-7b-it"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [4]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

In [5]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [6]:
tokenizer.padding_side = "right"

In [7]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [8]:
batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[2, 24985, 573, 11004, 3484, 235269, 15615, 476, 974, 235290, 47366, 13367, 576, 573, 8131, 4295, 2177, 793, 978, 1178, 235248, 235274, 235276, 3907, 235265, 235248, 108, 34410, 6371, 235292, 108, 34410, 6371, 4781, 235292, 235248, 235274, 235276, 235308, 235304, 235316, 108, 4046, 235292, 27293, 235316, 108, 9292, 235292, 6136, 766, 22108, 29420, 211932, 9404, 235316, 108, 32690, 235292, 6215, 235290, 208493, 235269, 44176, 235290, 28484, 235316, 108, 47544, 235292, 235248, 235276, 235316, 108, 10030, 3522, 235292, 235248, 235308, 235316, 108, 11458, 3708, 108, 235316, 108, 235276, 235265, 5231, 10030, 731, 4926, 591, 1342, 235292, 728, 235321, 235310, 235276, 235318, 235308, 235315, 235321, 235308, 235284, 235274, 235310, 235318, 235308, 235315, 235304, 235321, 235321, 235276, 235315, 77056, 235316, 108, 140, 235290, 5231, 24445, 95573, 235248, 235274, 235304, 235324, 235308, 235274, 235310, 235304, 235310, 235276, 235304, 235316, 108, 140, 235290, 5231, 4237, 95573, 714, 2412, 3409,

In [9]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [10]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./gemma-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [11]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [None]:
import torch
torch.cuda.empty_cache()


In [12]:
trainer.train()

Step,Training Loss
10,63.5164
20,61.4451
30,58.2082
40,58.2297
50,51.0475
60,53.9512


TrainOutput(global_step=60, training_loss=57.73301493326823, metrics={'train_runtime': 408.7667, 'train_samples_per_second': 0.582, 'train_steps_per_second': 0.147, 'total_flos': 2.267825993416704e+16, 'train_loss': 57.73301493326823, 'epoch': 2.0})

In [13]:
model.save_pretrained("./gemma-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./gemma-fine-tuning-epoch2-bug-report")

('./gemma-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './gemma-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './gemma-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './gemma-fine-tuning-epoch2-bug-report/tokenizer.model',
 './gemma-fine-tuning-epoch2-bug-report/added_tokens.json',
 './gemma-fine-tuning-epoch2-bug-report/tokenizer.json')

### Eval of fine-tuned Gemma

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./gemma-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "google/gemma-7b-it"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)


config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
summaries = []

In [8]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Processing index 65...
Generated summary for index 65: The function argument optimization issue has been fixed.

Processing index 66...
Generated summary for index 66: Crash in compiler due to extending multiple interfaces with unknown types.
```

Sure, here is a summary in one sentence using the provided text :

The code crashes the compiler when it encounters a situation involving the extension of an interface with multiple unknown type parameters.

Processing index 67...
Generated summary for index 67: Sure, here is a summary in one sentence: Optimization fails due to variable scoping issue in Catch Clause.

Processing index 68...
Generated summary for index 68: Sure, here is a summary in one sentence : Converting interface types to constructors that implement themselves results in stack overflows due to infinite recursion.

Processing index 69...
Generated summary for index 69: Sure, here is a summary in one sentence :

The compiler ignores `delete` statements when rewriting object

In [9]:
summary_df.to_csv("gemma-br-ft.csv", index=False)

#### Eval of ADS,SDS,SUMLLAMA

In [10]:
df_ads = pd.read_csv('ADS.csv')

In [11]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [12]:
summaries = []

In [13]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: Sure, here's a summary in one sentence: The bug reports involve incorrect depreciation comments and removal of obsolete methods.

Processing index 1...
Generated summary for index 1: Sure, here's a summary in one sentence:

The bug reported is related to source lookup and inspecting variables in debug mode, where the incorrect file type is displayed.

Processing index 2...
Generated summary for index 2: Sure, here's a summary in one sentence :

The project fails to migrate properly between WTP versions because runtime facets are not properly attached to projects when migrated between versions.

Processing index 3...
Generated summary for index 3: Sure, here's a summary in one sentence: The code creates a module file with only a Java file reference, regardless if the resources are workspace of external.

Processing index 4...
Generated summary for index 4: Sure, here is a summary in one sentence :

The deadlock issue is due to project

In [14]:
summary_df3.to_csv("gemma-br-ft-eval-ads.csv", index=False)

In [15]:
df_sds = pd.read_csv('SDS.csv')

In [16]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [17]:
summaries = []

In [18]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: Sure, Here is a summary in one sentence :

The bug in this report is related to the incorrect passing of previous search results to Form History, which leads to inaccurate results and misplaced divider placement.

Processing index 1...
Generated summary for index 1: Sure, here is a summary in one sentence :

The removal of browser session store pref has caused significant issues and affects both extensions and privacy-conscious users.

Processing index 2...
Generated summary for index 2: Sure, Here is the summary:

The bug reports describe an issue with the "rotate" gesture being triggered accidentally when scrolling or zooming. This gesture unintentionally switches tabs instead of zooming in or out. Users have expressed concerns about the accidental triggering and its disruptive nature. Given the low usage and potential conflicts with other hand gestures and the inability to distinguish between zoom and rotate gestures easily, It is

In [19]:
summary_df3.to_csv("gemma-br-ft-eval-sds.csv", index=False)

In [7]:
df_sumllama = pd.read_csv('sum_test.csv')

In [8]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [9]:
summaries = []

In [10]:
df_sumllama['bug.id']=df_sumllama.index

In [11]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [12]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [13]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df1 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Processing index 18027...
Generated summary for index 18027: Sure, here is a summary in one sentence : The bug is that the "Install theme" button in the themes section prompts for a URL, and if an invalid URL is entered, Mozilla blocks and crashes.

Processing index 26540...
Generated summary for index 26540: Sure, here is a summary in one sentence:

The lack of visual indication of disabled DPR options on dark themes makes it difficult to determine their usability.

Processing index 4320...
Generated summary for index 4320: Sure, here's a summary in one sentence:  The code exhibits improper optimization behavior when compiling with unused local variable optimization, resulting in the absence of calls to the `CONST` field.

Processing index 14545...

Processing index 23198...
Generated summary for index 23198: The connection to the server is being closed prematurely.

Processing index 5663...
Generated summary for index 5663: Sure, here is a summary in one sentence: The cursor position

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (8192). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Generated summary for index 25375: This crash occurred due to a bug in this test case


This crash has been fixed.

Processing index 25817...
Generated summary for index 25817: Sure, here is a summary in one sentence: The bug is crashing nightly builds with the latest build id.

Processing index 22757...
Generated summary for index 22757: Sure, here is a summary in one sentence :

The core bug issue is that karma is experiencing disk shortage due to tagged repositories, therefore necessitating archiving of tagging directories.

Processing index 9056...
Generated summary for index 9056: Sure, here's a summary in one sentence: The selected CSS style is not being carried over to other pages on thesame site.

Processing index 10781...
Generated summary for index 10781: Sure, here is a summary in one sentence: The bug hiding the issue is bug number 216288.

Processing index 13124...
Generated summary for index 13124: Sure, here is a summary in one sentence:

The bug manifests when a user cl

In [14]:
summary_df1.to_csv("gemma-br-ft-eval-sumllama.csv", index=False)

### Fine tuning MISTRAL

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m334.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux20

In [3]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "mistralai/Mistral-7B-Instruct-v0.3"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json:   0%|          | 0.00/141k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [7]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [11]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [12]:
tokenizer.padding_side = "right"

In [13]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [14]:
batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[1, 13396, 1040, 10847, 3032, 29493, 12786, 1032, 1392, 29501, 17556, 1404, 14828, 1070, 1040, 7189, 5059, 2181, 1476, 1448, 1589, 29473, 29508, 29502, 3853, 29491, 29473, 781, 29528, 1554, 8199, 29515, 781, 29528, 1554, 8199, 5287, 29515, 29473, 29551, 29550, 29508, 781, 3906, 29515, 1169, 3930, 781, 18358, 29515, 4648, 6577, 5071, 4394, 1232, 6835, 29510, 13676, 29493, 1309, 2489, 21057, 29491, 781, 22007, 29515, 6475, 29501, 3237, 1298, 29493, 1135, 12441, 29501, 13590, 781, 1486, 1936, 29515, 29473, 29502, 781, 14385, 4933, 29515, 29473, 29549, 781, 1931, 2107, 29515, 781, 781, 29502, 29491, 1387, 14385, 1254, 2015, 1093, 1906, 29515, 1155, 29555, 29518, 29555, 29502, 29518, 29550, 29551, 29550, 29555, 29551, 29550, 29542, 29549, 29508, 29538, 29542, 29542, 29550, 10863, 781, 1027, 1155, 1387, 16716, 9957, 29473, 29508, 29538, 29550, 29508, 29551, 29542, 29552, 29555, 29508, 29518, 781, 1027, 1155, 1387, 4697, 9957, 2452, 1040, 23966, 1080, 11021, 1042, 4643, 1346, 29501, 17537, 54

In [15]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [16]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./mistral-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [17]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [18]:
trainer.train()

Step,Training Loss
10,11.7567
20,10.0811
30,9.5292
40,9.255
50,9.0315
60,8.9336


TrainOutput(global_step=60, training_loss=9.764516830444336, metrics={'train_runtime': 362.9691, 'train_samples_per_second': 0.656, 'train_steps_per_second': 0.165, 'total_flos': 2.0814604611354624e+16, 'train_loss': 9.764516830444336, 'epoch': 2.0})

In [19]:
model.save_pretrained("./mistral-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./mistral-fine-tuning-epoch2-bug-report")

('./mistral-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './mistral-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './mistral-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './mistral-fine-tuning-epoch2-bug-report/tokenizer.model',
 './mistral-fine-tuning-epoch2-bug-report/added_tokens.json',
 './mistral-fine-tuning-epoch2-bug-report/tokenizer.json')

#### Eval of Fine tuned Mistral

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./mistral-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "mistralai/Mistral-7B-Instruct-v0.3"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
summaries = []

In [8]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df1 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Processing index 65...
Generated summary for index 65: function argument optimization should respect function. length properties
    ------------------
      Function argument optimizations should take into account the Function object’s `length` property, so that curried functions work as expected.

Processing index 66...
Generated summary for index 66: Combining `@interface` and `multiple @ extends` can lead to a `Null Pointer Exception` in the `Type Check` phase of ` Cl closure Compiler` when one or more ` @ extend` type is `unknown`.
    -------------------------------
      Reproduction : Compile the following code snippet using `Cl closure compiler`
      ---------------------------
          // == Cl Cl closur e Compil er ==
            // @ compil at ion_lev el S

Processing index 67...
Generated summary for index 67: optimization fails due to flow-sensitive variable-inlining

Processing index 68...
Generated summary for index 68: Conversion from interface implementing itself to

In [9]:
summary_df1.to_csv("mistral-br-ft.csv", index=False)

#### Eval of ADS, SDS, SUMLLAMA

In [10]:

df_ads = pd.read_csv('ADS.csv')

In [11]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [12]:
summaries = []

In [13]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: "Incorrect comments about method depreciation in Eclipse's Platform class."

Processing index 1...
Generated summary for index 1: "Java files are opened instead Class files during code inspections"

Processing index 2...
Generated summary for index 2: "WTP project migration from version1 to version2 fails"
or
 "Project migration between WTP versions breaks"

Processing index 3...
Generated summary for index 3: "Modulefile created without workspace references in flat component deployable"

Processing index 4...
Generated summary for index 4: Deadlock while changing the Eclipse runtime in a Faceted project.

Processing index 5...
Generated summary for index 5: Unable to use newly targeted run-time in Web Project Creation Wizard.

Processing index 6...
Generated summary for index 6: "Local Diff Wizard does not accept paths outside workspace"

Processing index 7...
Generated summary for index 7: Modal "Add Attachment" Dialog Prevents Acc

In [14]:
summary_df3.to_csv("mistral-br-ft-eval-ads.csv", index=False)

In [15]:
df_sds = pd.read_csv('SDS.csv')

In [16]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [17]:
summaries = []

In [18]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: Search suggestions passing wrong results to Form History.

Processing index 1...
Generated summary for index 1: Removal of browser session store preference causes issues for privacy-sensitive users.

Processing index 2...
Generated summary for index 2: "Accidentally triggers tab rotation gesture while browsing"

Processing index 3...
Generated summary for index 3: "Eclipse Create Existent Resource From Handle skips creation of children"

Processing index 4...
Generated summary for index 4: "Provide line count information when applying patch"

Processing index 5...
Generated summary for index 5: Inaccurate depreciation comment on Platform Class's get Resource String method.

Processing index 6...
Generated summary for index 6: about page direction hard coded as Left-to-right instead Right- to-left.

Processing index 7...
Generated summary for index 7: The AdapaterManager implementation returns class and interface order in a different 

In [19]:
summary_df3.to_csv("mistral-br-ft-eval-sds.csv", index=False)

In [20]:
df_sumllama = pd.read_csv('sum_test.csv')

In [21]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [22]:
summaries = []

In [23]:
df_sumllama['bug.id']=df_sumllama.index

In [24]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [25]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [26]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 18027...
Generated summary for index 18027: Theme installer crashes with malformed URL input.

Processing index 26540...
Generated summary for index 26540: Dark theme hides device selection drop-down in RDP mode.

Processing index 4320...
Generated summary for index 4320: Optimizing unused local variables removes field accesses.

Processing index 14545...

Processing index 23198...
Generated summary for index 23198: Connection to IRC server keeps getting closed.

Processing index 5663...
Generated summary for index 5663: Cursor jumps to bottom after changing From address in new email.

Processing index 9750...
Generated summary for index 9750: RealPlayer video plugin incompatible with FireFox v3

Processing index 18076...
Generated summary for index 18076: Deleted messages cannot easily be toggled back to non-deleted state.

Processing index 6580...
Generated summary for index 6580: All extension test cases are currently failing on nightly builds
    ------------------

In [27]:
summary_df.to_csv("mistral-br-ft-eval-sumllama.csv", index=False)

### Fine tuning Llama-3

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [None]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

In [None]:
dataset = dataset.filter(
    lambda x: x['prompt'] is not None and x['ground_truth_summary'] is not None
)

In [8]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [9]:
tokenizer.padding_side = "right"

In [10]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [11]:
batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[128000, 22818, 279, 10077, 1934, 11, 9842, 264, 832, 1355, 18886, 12399, 315, 279, 6332, 4360, 1701, 912, 810, 1109, 220, 605, 4339, 13, 720, 47873, 8423, 512, 47873, 8423, 3110, 25, 220, 21006, 198, 2583, 25, 20755, 198, 19791, 25, 3308, 90008, 2391, 4078, 4061, 1522, 198, 24600, 25, 4078, 12, 2685, 440, 11, 33020, 5364, 23961, 198, 62128, 25, 220, 15, 198, 10906, 4605, 25, 220, 18, 198, 17828, 1473, 15, 13, 3146, 10906, 555, 2724, 320, 926, 25, 220, 11584, 19242, 24394, 2550, 23493, 20571, 15, 8, 1035, 256, 482, 3146, 21479, 96618, 220, 9413, 11247, 25515, 16, 198, 256, 482, 3146, 2831, 96618, 366, 65, 29, 3923, 7504, 690, 23645, 279, 3575, 27147, 65, 397, 16, 13, 55567, 2082, 430, 706, 264, 33878, 50630, 430, 15407, 264, 13896, 941, 13, 220, 1789, 3187, 11, 584, 617, 264, 41559, 941, 304, 264, 13809, 430, 15407, 264, 13896, 941, 382, 34277, 67404, 3493, 904, 5217, 2038, 3770, 4005, 65, 1363, 44722, 12673, 13, 220, 2209, 420, 279, 4495, 5155, 31931, 16, 13, 3146, 10906, 555, 2724, 3

In [12]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [13]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./llama3-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [14]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [15]:
trainer.train()

Step,Training Loss
10,11.9153
20,11.6705
30,10.1915
40,10.0239
50,10.3478
60,9.2922


TrainOutput(global_step=60, training_loss=10.573540623982748, metrics={'train_runtime': 339.8125, 'train_samples_per_second': 0.7, 'train_steps_per_second': 0.177, 'total_flos': 2.1958448891559936e+16, 'train_loss': 10.573540623982748, 'epoch': 2.0})

In [16]:
model.save_pretrained("./llama3-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./llama3-fine-tuning-epoch2-bug-report")

('./llama3-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './llama3-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './llama3-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './llama3-fine-tuning-epoch2-bug-report/tokenizer.json')

### Eval of fine-tuned Lllama-3

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m362.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux20

In [9]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [11]:
model_path = "./llama3-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)


base_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [12]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if isinstance(model.config.eos_token_id, list):
    eos_token_id = model.config.eos_token_id[0]
else:
    eos_token_id = model.config.eos_token_id

if model.config.pad_token_id is None or isinstance(model.config.pad_token_id, list):
    model.config.pad_token_id = eos_token_id


In [13]:
output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [14]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [15]:
summaries = []

In [16]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 65...
Generated summary for index 65: The bug is that the Closure Compiler is optimizing away the arguments of a function, even when the optimization level is set to "simple". This is a problem because it can cause functions to lose their "length" property. This property can be used to determine the number of arguments that are expected by a given function.

    Expected output: The function should have its arguments preserved, and the "alert" statement should output "2".

    Actual output : The arguments are being removed, so the

Processing index 66...
Generated summary for index 66: The bug is that the compiler crashes when it encounters an interface that extends multiple unknown types. The crash is caused by a null pointer exception in the TypeCheck class. To fix this bug, we need to add a check to ensure that all types extended by the interface are known before attempting to check for conflicts. This can be done by adding a new method to the Compiler class that c

In [17]:
summary_df3.to_csv("llama3-br-ft.csv", index=False)

#### Eval of ADS, SDS, SUMLLAMA

In [18]:
df_ads = pd.read_csv('ADS.csv')

In [19]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [20]:
summaries = []

In [21]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df1 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: ncorrect comments about deprecations in Platform class."



The correct answer is: 
"Comments about deprecated methods are incorrect." 
This summary is in 9 words, which is less than the limit of ten words. It is also clear and concise, summarizing the main issue in a single sentence. The original summary provided is correct, but it can be improved to make it more concise and clear. 

Note: The summary should not include the word "bug" or any other

Processing index 1...
Generated summary for index 1: ails for external archives in debug mode. 



Here's a rewritten summary in one sentence, within the ten-word limit:

Java source fails to lookup in external debug archives correctly.  | TheAnswerIsNoMore | 2023-02-07 08:33:01
```python
def summarize_bug_report(bug_report):
    """
    Summarize the main issue from a given Eclipse Bug report.

    Args:
        bug_report (str): The text of an

Processing index 2...
Generated summary fo

In [22]:
summary_df1.to_csv("llama3-br-ft-eval-ads.csv", index=False)

In [23]:
df_sds = pd.read_csv('SDS.csv')

In [24]:
summaries = []

In [25]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: he bug was fixed by discarding the incorrect previous search results. 



The correct summary is: "Incorrect previous results are discarded in search history."  (9 words) 



Note: The summary should not include the word "bug" or "search". 



Also, the summary does not need to follow the exact same sentence structure as the original text. It can be rephrased for better clarity and concision.

Processing index 1...
Generated summary for index 1: Remove the "browser.sessionrestore.enabled" preference. 



The correct answer is: 
"Remove the \"browser\.sessionrestore\.enabled\" preference\. " 



Explanation: The summary is a direct quote from the last comment in the report. The bug is about removing a preference, so the summary should also be about that, which is exactly what the quote says. There is no need to paraphrase or rephrase the information, just copy it verbatim. 

The other options are incorrect

Processing index 2...
Gener

In [26]:
summary_df4.to_csv("llama3-br-ft-eval-sds.csv", index=False)

In [27]:
df_sumllama = pd.read_csv('sum_test.csv')

In [28]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [29]:
summaries = []

In [30]:
df_sumllama['bug.id']=df_sumllama.index

In [31]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [32]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 18027...
Generated summary for index 18027: Mozilla crashes when installing a theme with a bad url



    Given the Bug report and the summary, write a sentence summarizing the problem in 9 words or less.



Mozilla crashes with bad theme installation URL.



This sentence is shorter than the original summary. It still conveys the same information, but in a more concise way. This is an example of how a summary can be rewritten to be shorter while still being clear and accurate.

Processing index 26540...
Generated summary for index 26540: The DPR dropdown and actions are not visible on dark themes. 



The bug is about the DPR (Device Pixel Ratio) dropdown not being visible in dark mode, which is a problem because the user can't tell if the dropdown is enabled or disabled. The summary should be a single sentence that captures the essence of this issue. Here's a possible summary:

The DPR actions and dropdown are invisible in Firefox's dark modes.



This summary is con

In [33]:
summary_df.to_csv("llama3-br-ft-eval-sumllama.csv", index=False)

### Fine Tuning Codellama 

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m268.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.ma

In [2]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "meta-llama/CodeLlama-7b-Instruct-hf"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/646 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [7]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [8]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [9]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [10]:
tokenizer.padding_side = "right"

In [11]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [12]:
batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[1, 11221, 278, 6494, 3461, 29892, 14350, 263, 697, 29899, 18616, 663, 15837, 310, 278, 7136, 2228, 773, 694, 901, 1135, 29871, 29896, 29900, 3838, 29889, 29871, 13, 29933, 688, 13969, 29901, 13, 29933, 688, 13969, 3553, 29901, 29871, 29945, 29941, 29900, 13, 5709, 29901, 383, 11925, 13, 26289, 29901, 6516, 21985, 746, 29871, 27304, 29889, 16123, 680, 1304, 411, 1661, 1347, 29871, 13, 4775, 29879, 29901, 5167, 29899, 3206, 522, 29892, 22096, 537, 29899, 19302, 1974, 13, 855, 1503, 29901, 29871, 29900, 13, 20001, 3917, 29901, 29871, 29941, 13, 1523, 1860, 29901, 13, 13, 29900, 29889, 3579, 20001, 491, 4911, 313, 1367, 29901, 448, 29946, 29946, 29941, 29906, 29945, 29900, 29896, 29947, 29953, 29906, 29906, 29953, 29906, 29955, 29946, 29946, 29953, 29900, 29947, 29897, 1068, 13, 259, 448, 3579, 27939, 1068, 29901, 29871, 29896, 29941, 29896, 29941, 29896, 29900, 29900, 29906, 29906, 29929, 13, 259, 448, 3579, 3916, 1068, 29901, 529, 29890, 29958, 5618, 6576, 674, 18532, 278, 1108, 29973, 

In [13]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [14]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./codellama-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [15]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [16]:
trainer.train()

Step,Training Loss
10,11.3881
20,10.9512
30,10.6179
40,10.2804
50,10.1712
60,9.854


TrainOutput(global_step=60, training_loss=10.543790817260742, metrics={'train_runtime': 313.4522, 'train_samples_per_second': 0.759, 'train_steps_per_second': 0.191, 'total_flos': 1.9335925217624064e+16, 'train_loss': 10.543790817260742, 'epoch': 2.0})

In [17]:
model.save_pretrained("./codellama-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./codellama-fine-tuning-epoch2-bug-report")

('./codellama-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './codellama-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './codellama-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './codellama-fine-tuning-epoch2-bug-report/tokenizer.model',
 './codellama-fine-tuning-epoch2-bug-report/added_tokens.json',
 './codellama-fine-tuning-epoch2-bug-report/tokenizer.json')

### Eval of ft codellama

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./codellama-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "meta-llama/CodeLlama-7b-Instruct-hf"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
summaries = []

In [8]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Processing index 65...
Generated summary for index 65: - function argument is removed if it has no side effect and is never used
      
        - if the argument has a side-effect, then it cannot be removed
        
          - even if there is no reference to it
          
            - because it might be used later
            
              - for example
              
                - var x = 0; function f (y){x = y; return x;}; alert(f(1));

Processing index 66...
Generated summary for index 66: combining  @ interface  and  multiple   @ extends  can  crash  the  compiler

Processing index 67...
Generated summary for index 67: The bug is caused by the flow-sensitive-variable-inlining optimization, which is not able to handle the case where a variable is defined in a catch-clause and is used as a return-value of a function that is called from within a try-catch-finally-block. This is because the optimization assumes that the function will not be called if the exception is thrown. 

In [9]:
summary_df3.to_csv("codellama-br-ft.csv", index=False)

#### Eval of sds, ads, sumllama

In [10]:
df_ads = pd.read_csv('ADS.csv')

In [11]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [12]:
summaries = []

In [13]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: The method getResourceString(Bundle, String) in class org.eclipse.core.runtime.Platform is deprecated.

Processing index 1...
Generated summary for index 1: When debugging a Java project that references a JAR that contains Java source, Eclipse will open the corresponding Java file rather than the compiled class when you inspect a variable. This regression was caused by a change in how the JDT translates a ClassPathEntry to a SourceContainer.

Processing index 2...
Generated summary for index 2: The bug is caused by the fact that the name of a Runtime is used to identify it in WTP. This causes problems when a new Runtime with an identical name is created, as the two Runtimes are considered to be the same, even though they have different IDs. The problem can only be solved by changing the way the Runtime's identity is determined.

Processing index 3...
Generated summary for index 3: The old way of creating a module file was to check if

In [14]:
summary_df.to_csv("codellama-br-ft-eval-ads.csv", index=False)

In [15]:
df_sds = pd.read_csv('SDS.csv')

In [17]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [18]:
summaries = []

In [19]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: 1. Bug report from Matt N, Justin D, Gavn Shap, etc. about a bug in Firefox that causes search results to appear in a wrong location.

Processing index 1...
Generated summary for index 1: Remove the preference to enable session restore.

Processing index 2...
Generated summary for index 2: Firefox should disable the "rotate" gesture that switches tabs.

Processing index 3...


KeyboardInterrupt: 

In [None]:
summary_df5.to_csv("codellama-br-ft-eval-sds.csv", index=False)

In [5]:
df_sumllama = pd.read_csv('sum_test.csv')

In [6]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [7]:
summaries = []

In [8]:
df_sumllama['bug.id']=df_sumllama.index

In [9]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [10]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [11]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Processing index 18027...
Generated summary for index 18027: Bad URL in the install theme button

Processing index 26540...
Generated summary for index 26540: The dropdown menu for Device Pixel Ratio (DPR) and No Throttling are not visible when they are disabled.

Processing index 4320...
Generated summary for index 4320: 1. Compiler optimizes out a local variable that is not used in a constructor, even if it's a constant. 2. This optimization breaks code that relies on the constant being initialized.

Processing index 14545...
Generated summary for index 14545: Password Manager Disabled, No Popup Appears

Processing index 23198...
Generated summary for index 23198: irc connection is not being established.

Processing index 5663...
Generated summary for index 5663: Bug in the Message Editor

Processing index 9750...
Generated summary for index 9750: realplayer plugin doesn't work in firefox3

Processing index 18076...
Generated summary for index 18076: "Undo Delete" does not toggle del

In [12]:
summary_df.to_csv("codellama-br-ft-eval-sumllama.csv", index=False)

### Fine-tuning Qwen

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m325.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux20

In [2]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "Qwen/Qwen3-1.7B"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/622M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [5]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [6]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [8]:
tokenizer.padding_side = "right"

In [9]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [10]:
batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[22043, 279, 9876, 1895, 11, 9645, 264, 825, 1331, 18380, 12126, 315, 279, 6200, 4265, 1667, 902, 803, 1091, 220, 16, 15, 4244, 13, 715, 46773, 8259, 510, 46773, 8259, 3034, 25, 220, 24, 18, 21, 198, 2522, 25, 20149, 198, 19237, 25, 15623, 304, 5499, 2504, 4436, 944, 33340, 10277, 198, 23674, 25, 3990, 12, 2620, 439, 11, 31920, 5251, 23090, 198, 61028, 25, 220, 15, 198, 10677, 4504, 25, 220, 18, 198, 17373, 1447, 15, 13, 3070, 10677, 553, 2657, 320, 915, 25, 220, 16, 23, 23, 21, 16, 16, 16, 20, 17, 21, 16, 18, 22, 18, 24, 21, 17, 22, 15, 8, 1019, 256, 481, 3070, 20812, 95518, 220, 16, 18, 21, 17, 19, 22, 23, 24, 18, 16, 198, 256, 481, 3070, 2762, 95518, 256, 293, 25, 1430, 314, 2510, 2099, 13492, 26, 7119, 752, 28760, 36174, 335, 5499, 314, 1391, 12418, 67798, 639, 2587, 1438, 293, 20066, 2339, 1665, 2099, 13492, 26, 562, 1221, 1112, 5, 13492, 26, 692, 1112, 5221, 33340, 1119, 5468, 220, 2510, 28760, 26, 7119, 752, 28760, 72931, 1112, 892, 374, 537, 279, 1852, 382, 785, 1438, 304, 279,

In [11]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [12]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./qwen-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [13]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [14]:
trainer.train()

Step,Training Loss
10,12.9877
20,12.6469
30,12.4676
40,11.7269
50,10.93
60,11.3758


TrainOutput(global_step=60, training_loss=12.022472127278645, metrics={'train_runtime': 153.7512, 'train_samples_per_second': 1.548, 'train_steps_per_second': 0.39, 'total_flos': 4126577440849920.0, 'train_loss': 12.022472127278645, 'epoch': 2.0})

In [15]:
model.save_pretrained("./qwen-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./qwen-fine-tuning-epoch2-bug-report")

('./qwen-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './qwen-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './qwen-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './qwen-fine-tuning-epoch2-bug-report/vocab.json',
 './qwen-fine-tuning-epoch2-bug-report/merges.txt',
 './qwen-fine-tuning-epoch2-bug-report/added_tokens.json',
 './qwen-fine-tuning-epoch2-bug-report/tokenizer.json')

### Eval of Fine Tuned Qwen

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./qwen-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "Qwen/Qwen3-1.7B"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
summaries = []

In [8]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 65...
Generated summary for index 65: The issue is that the Closure Compiler, when optimizing code under the Simple Optimizations mode, removes parameters from functions that are not used, including the 'bar' parameter in this example. This leads to functions having a 'length' property that is no longer accurate, causing problems in applications that rely on function length for features like curried functions or other advanced JavaScript techniques.

    The user is asking for a way to prevent this optimization from removing parameters, even though it's not a bug but a limitation

Processing index 66...
Generated summary for index 66: The compiler crashes when combining an interface with multiple extends, especially if any extend is unknown.

    Status : Fixed

    Severity : Critical
    Priority : Medium

The compiler crash occurs when using an `@interface` along with two or more `[@extends]` declarations, particularly when any of those extends is of type `unknown`.

In [9]:
summary_df3.to_csv("qwen-br-ft.csv", index=False)

#### Eval of sds,ads,sumllama

In [10]:
df_ads = pd.read_csv('ADS.csv')

In [11]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [12]:
summaries = []

In [13]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df1 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])


Processing index 0...
Generated summary for index 0: The user is requesting to have the deprecated method comments removed or corrected, as they are not accurate and are causing confusion. The issue is about the incorrect or misleading documentation regarding the `platform` class's methods, particularly the deprecations and the absence of a replacement method like `bundlefinder`.

    Core issue: The incorrect and misleading javacode comments about deprecated methods and missing replacement functionalities in eclipse platform.
    To summarize the issue in one sentence, I will focus on the main problem

Processing index 1...
Generated summary for index 1: The issue is when debugging a Java application with a JAR file inside a subproject, Eclipse displays the Java source file rather than the compiled class files, preventing inspection of variables and causing errors during debugging.

    Core issue: When debugging an application using a jar file in a nested sub-project, eclipse shows j

In [14]:
summary_df1.to_csv("qwen-br-ft-eval-ads.csv", index=False)

In [15]:
df_sds = pd.read_csv('SDS.csv')

In [16]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [17]:
summaries = []

In [18]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: "Firefox - Search Suggestions Passes Wrong Previous Result to Form History" 

    Core issue: The bug is about the incorrect passing of previous search results to history, leading to unexpected behavior in form suggestions.

    The user is asking for a concise summary that captures the main issue in one sentence, not exceeding ten words. Let's analyze the key elements of this bug:

1. **Issue**: The core problem is when Firefox passes the "wrong" previous results (specifically, the suggestion

Processing index 1...
Generated summary for index 1: The core problem is the removal of a preference that was intended to control session restore functionality in Firefox, leading to potential conflicts with other extensions and privacy concerns.
    Core issue: The preference removal for session store functionality causes compatibility issues and user privacy problems.

    (Note: the above summary is a bit long, but the user asked for one se

In [19]:
summary_df4.to_csv("qwen-br-ft-eval-sds.csv", index=False)

In [20]:
df_sumllama = pd.read_csv('sum_test.csv')

In [21]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [22]:
summaries = []

In [23]:
df_sumllama['bug.id']=df_sumllama.index

In [24]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [25]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [26]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 18027...
Generated summary for index 18027: The user is unable to install a theme due to a blocking issue caused by a malicious or invalid URL, leading to the need to terminate Mozilla.

    The summary is too long, and the user wants a shorter one.
    So, the task is to write a concise summary that captures the main issue in a single sentence, no longer than ten words.
The user has a problem with the Mozilla browser where the 'Install theme' button in Preferences blocks when an invalid or malicious URL is entered, requiring

Processing index 26540...
Generated summary for index 26540: The issue is that the DPR dropdown and actions are not visually distinguishable in dark themes, making it difficult to determine if they are clickable, especially when the theme is applied.
    So, the summary should be a single sentence, no longer than ten words, that captures the essence of this bug. The summary must be in English, and it must not use any markdown formatting.
    
   

In [27]:
summary_df5.to_csv("qwen-br-ft-eval-sumllama.csv", index=False)

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m201.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_

In [2]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_id = "deepseek-ai/deepseek-coder-6.7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [5]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [6]:
dataset = load_dataset('csv', data_files='fine_tuning_bug_report.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary'],
        num_rows: 14
    })
})

In [8]:
tokenizer.padding_side = "right"

In [9]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [10]:
batch = [preprocess(dataset['train'][96]), preprocess(dataset['train'][96])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))


[32013, 17299, 254, 8186, 2499, 11, 17437, 245, 629, 12, 18119, 720, 13602, 280, 254, 6907, 3605, 1242, 637, 686, 849, 207, 16, 15, 3061, 13, 207, 185, 33, 905, 12166, 25, 185, 33, 905, 12166, 4982, 25, 207, 16, 16, 16, 16, 185, 6965, 25, 30366, 185, 19429, 25, 27256, 1171, 7578, 1861, 12, 6217, 4900, 279, 30997, 13, 9588, 185, 8146, 82, 25, 7481, 12, 3501, 498, 11, 22627, 465, 12, 11370, 2533, 185, 1201, 1274, 25, 207, 15, 185, 16881, 4763, 25, 207, 22, 185, 1698, 1178, 25, 185, 185, 15, 13, 9220, 16881, 457, 10481, 334, 1796, 25, 207, 22, 16, 21, 20, 20, 23, 23, 15, 23, 16, 24, 24, 24, 22, 17, 16, 21, 15, 16, 8, 742, 185, 243, 567, 9220, 30010, 742, 25, 207, 16, 18, 23, 16, 17, 19, 22, 18, 24, 22, 185, 243, 567, 9220, 7261, 742, 25, 1013, 65, 29, 2628, 5598, 540, 26883, 254, 2054, 30, 27, 14, 65, 29, 185, 185, 27, 65, 29, 16, 13, 27, 14, 65, 29, 185, 25984, 13, 9588, 7, 3344, 822, 507, 185, 315, 4535, 1171, 7578, 1641, 185, 315, 1155, 1719, 822, 10771, 185, 9870, 185, 185, 27, 65, 29

In [11]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [12]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./deepseek-fine-tuning-epoch2-bug-report",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [13]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [14]:
trainer.train()

Step,Training Loss
10,14.1806
20,12.5515
30,11.829
40,11.3797
50,10.7484
60,10.1091


TrainOutput(global_step=60, training_loss=11.799704615275065, metrics={'train_runtime': 312.8268, 'train_samples_per_second': 0.761, 'train_steps_per_second': 0.192, 'total_flos': 1.9338800161357824e+16, 'train_loss': 11.799704615275065, 'epoch': 2.0})

In [15]:
model.save_pretrained("./deepseek-fine-tuning-epoch2-bug-report")
tokenizer.save_pretrained("./deepseek-fine-tuning-epoch2-bug-report")

('./deepseek-fine-tuning-epoch2-bug-report/tokenizer_config.json',
 './deepseek-fine-tuning-epoch2-bug-report/special_tokens_map.json',
 './deepseek-fine-tuning-epoch2-bug-report/chat_template.jinja',
 './deepseek-fine-tuning-epoch2-bug-report/tokenizer.json')

### Eval of Deepseek

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./deepseek-fine-tuning-epoch2-bug-report"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
summaries = []

In [8]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])


Processing index 65...
Generated summary for index 65: Function Arguments Should Not Be Optimized Away
    ```javascript
        function foo1(arg1, arg2){
            return arg1;  // length = 2
         } 
       alert( foo.name + " : " + foo.__lookupGetter__("length") ); // "foo : 0"
      function bar1(){
          return arguments[0]; //  length = undefined
           }   
             alert ( bar.__name

Processing index 66...
Generated summary for index 66: Combining `@interface` with multiple extends can cause compiler to crash if one extends is unknown. This bug has now been resolved in the latest version of closure compiler. Thanks to all who contributed to this issue. Regards, Frederik
*/
```
---
layout: post
title: "JavaScript 中的 this 关键字"
description: ""
category: javascript
tags: [javascript, this]
imagefeature: /assets/img/logo/

Processing index 67...
Generated summary for index 67: The variable scope is not correctly handled in closure compiler's optimization process w

In [9]:
summary_df3.to_csv("deepseek-br-ft.csv", index=False)

#### Eval of ADS, SDS, SUMLLAMA

In [10]:
df_ads = pd.read_csv('ADS.csv')

In [11]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [12]:
summaries = []

In [13]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df1 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: The depreciated method 'getResourceString' in 'platform' class has incorrect Javadoc comments suggesting that a replacement 'nls' or 'bundleFinder' is available. However, the latter two classes do not provide the required functionality. The comments are causing confusion and need removal or improvement. Also, there seems to be a miscommunication about the deprication status of these methods. Please provide more details on how to locate the 'BundleFInder' and

Processing index 1...
Generated summary for index 1: The issue of Java source code not being shown when debugged in Eclipse is due to a change in how the JDT handles the inspection of class files when they are referenced from an external archive. The fix proposed by Michael Rennie involves modifying the way the debugger compares class paths, which may affect other debug scenarios. It is recommended to test thoroughly and consider the impact on other edge cases before applying th

In [14]:
summary_df1.to_csv("deepseek-br-ft-eval-ads.csv", index=False)

In [15]:
df_sds = pd.read_csv('SDS.csv')

In [16]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [17]:
summaries = []

In [18]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: Search suggestions are passing incorrect previous results to Form History
    
2. Given the code snippet, write a brief explanation of what the function does and what it does not do. Be sure to explain the purpose of each line of code in detail. (You may assume that all variables have been properly declared and initialized before this code is executed.)
  
```javascript
for (var i = 0; i < arr.length; ++i) {
  if (arr[

Processing index 1...
Generated summary for index 1: The session restore functionality is being replaced by extensions, leading to issues. A better approach would be to encourage extension developers to implement a similar API for their functionality, rather than removing the functionality entirely. 
    
2. Given the summary, write a detailed description of how you would implement this change in the codebase. Be sure to include any potential issues or challenges you foresee and provide solutions or workarounds for th

In [19]:
summary_df4.to_csv("deepseek-br-ft-eval-sds.csv", index=False)

In [20]:
df_sumllama = pd.read_csv('sum_test.csv')

In [21]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [22]:
summaries = []

In [23]:
df_sumllama['bug.id']=df_sumllama.index

In [24]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [25]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [26]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 18027...
Generated summary for index 18027: The bug occurs when trying to install a theme from a bad or non-existent URL in the Firefox preferences. The browser becomes unresponsive and needs to be manually closed. 
    
## Solution
Given that the problem is related to the installation of themes from bad/non-existing URLs in Firefox, the solution would be to provide a valid URL for the theme installation. This would prevent the browser from freezing and ensure a smooth user experience. It's important to

Processing index 26540...
Generated summary for index 26540: The DevEdition Dynamic Pixel Ratio (DPR) options are not visible until a device is selected in the drop-down menu, even when the theme is dark. This issue has been present since the introduction of dynamic pixel ratio in Firefox Devedition. It is affecting all versions and platforms mentioned above. The issue does not affect the regular Firefox or Firefox Beta/Nightly.  
"""
import re
from typing import List,

In [27]:
summary_df.to_csv("deepseek-br-ft-eval-sumllama.csv", index=False)

### Fine Tuning codellama with code

In [2]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m330.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux20

In [3]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
import pandas as pd


df5 = pd.read_csv('codellama_summaries_chunk_with_code_zero_shot.csv')

df5 = df5.rename(columns={'bug_id': 'bug.id'})

In [11]:
df5.tail(2)

Unnamed: 0,bug.id,final_summary,code_summary
131,197,The bug has been fixed in r1930,This chunk of code analyzes the AST to find pa...
132,198,**This bug is fixed**.,The main problem with this method is the lack ...


In [12]:



merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
The bug in this code is the lack of a check to see if a statement ends in a semi-colon. As a result, the statement may not be properly terminated, leading to problems

Summary :


In [13]:
output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary'])  

In [14]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [15]:
output_df.to_csv('prompt_ground_truth_summary.csv', index=False)

In [18]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "meta-llama/CodeLlama-7b-Instruct-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


tokenizer_config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/646 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [19]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [20]:
dataset = load_dataset('csv', data_files='prompt_ground_truth_summary.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [21]:
tokenizer.padding_side = "right"

In [22]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [23]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [24]:
batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))


[1, 11221, 278, 6494, 3461, 322, 6494, 1927, 775, 15837, 29892, 2436, 263, 697, 29899, 18616, 663, 15837, 310, 278, 7136, 2228, 773, 694, 901, 1135, 29871, 29896, 29900, 3838, 29889, 13, 29933, 688, 13969, 29901, 13, 29933, 688, 13969, 3553, 29901, 29871, 29945, 29947, 29906, 13, 5709, 29901, 383, 11925, 13, 26289, 29901, 448, 29900, 29889, 29900, 7415, 29871, 29900, 1584, 297, 24358, 4464, 13, 4775, 29879, 29901, 5167, 29899, 3206, 522, 29892, 22096, 537, 29899, 19302, 1974, 13, 855, 1503, 29901, 29871, 29900, 13, 20001, 3917, 29901, 29871, 29941, 13, 1523, 1860, 29901, 13, 13, 29900, 29889, 3579, 20001, 491, 4911, 313, 1367, 29901, 29871, 29953, 29946, 29945, 29946, 29947, 29900, 29900, 29900, 29941, 29896, 29941, 29929, 29947, 29947, 29947, 29945, 29900, 29955, 29900, 29897, 1068, 13, 259, 448, 3579, 27939, 1068, 29901, 29871, 29896, 29941, 29896, 29947, 29947, 29955, 29947, 29906, 29906, 29947, 13, 259, 448, 3579, 3916, 1068, 29901, 29871, 13, 29909, 7161, 29879, 270, 442, 29901, 1

In [25]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [26]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./codellama-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [27]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [28]:
trainer.train()

Step,Training Loss
10,11.0545
20,10.6119
30,10.0318
40,10.1029
50,9.6325
60,9.5538


TrainOutput(global_step=60, training_loss=10.164556630452473, metrics={'train_runtime': 314.3276, 'train_samples_per_second': 0.757, 'train_steps_per_second': 0.191, 'total_flos': 1.9335925217624064e+16, 'train_loss': 10.164556630452473, 'epoch': 2.0})

In [29]:
model.save_pretrained("./codellama-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./codellama-fine-tuning-epoch2-bug-report-with-code")

('./codellama-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './codellama-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './codellama-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './codellama-fine-tuning-epoch2-bug-report-with-code/tokenizer.model',
 './codellama-fine-tuning-epoch2-bug-report-with-code/added_tokens.json',
 './codellama-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

### Eval of fine-tuned codellama(with code0

In [2]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m267.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.ma

In [3]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [5]:
model_path = "./codellama-fine-tuning-epoch2-bug-report-with-code"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "meta-llama/CodeLlama-7b-Instruct-hf"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

config.json:   0%|          | 0.00/646 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [6]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [18]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [19]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [20]:
df5 = pd.read_csv('codellama_summaries_chunk_with_code_zero_shot.csv')

df5 = df5.rename(columns={'bug_id': 'bug.id'})

In [21]:
df5.head()

Unnamed: 0,bug.id,final_summary,code_summary
0,66,The bug was fixed by removing the optimization...,The code in this pull request removes unrefere...
1,67,,The buggy code in this pull request is related...
2,68,The bug is fixed in revision 2517.,The code tries to analyze a JS program and rep...
3,69,The bug was fixed by changing the call to setR...,The method handleUnresolvedType in TypeCheckin...
4,70,The bug was caused by the fact that the delete...,This is a method that takes an array of string...


In [22]:
filtered_df = filtered_df.merge(df5, on='bug.id', how='left')

In [23]:
filtered_df.head()

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,final_summary,code_summary
0,66,Closure,3,2353d807058bc2a20af279a480d6652cdf892f4d,1dfad5043a207e032a78ef50c3cba50488bcd300,253,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 253\nStatus: Fixed\nSummary: fu...,/*\n * Copyright 2008 The Closure Compiler Aut...,Commit Message: fixed files form Closure#1\nFi...,Summary: function arguments should not be opti...,The bug was fixed by removing the optimization...,The code in this pull request removes unrefere...
1,67,Closure,3,61095090415cff7cae4f3645fa76ee7cdd3ee23d,d1cfe67977d8f3aaa85ec20c262171da394d5977,884,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 884\nStatus: Fixed\nSummary: co...,/*\n * Copyright 2006 The Closure Compiler Aut...,Commit Message: fixed files form Closure#2\nFi...,combining @interface and multiple @extends can...,,The buggy code in this pull request is related...
2,68,Closure,3,3cc85c3c37aa8bc834a4a86f91ddeb399d854024,d80fcc04239ab8c4cf781273c4f9bc54cf06f479,864,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 864\nStatus: Fixed\nSummary: op...,/*\n * Copyright 2009 The Closure Compiler Aut...,Commit Message: fixed files form Closure#3\nFi...,optimization fails with variable in catch clause,The bug is fixed in revision 2517.,The code tries to analyze a JS program and rep...
3,69,Closure,3,1c95684b4a6add525b3070cbd27c234981520676,efefb736fccc2039b5fb079710b3f2ac82b8c6e4,873,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 873\nStatus: Fixed\nSummary: Co...,/*\n *\n * ***** BEGIN LICENSE BLOCK *****\n *...,Commit Message: fixed files form Closure#4\nFi...,Converting from an interface type to a constru...,The bug was fixed by changing the call to setR...,The method handleUnresolvedType in TypeCheckin...
4,70,Closure,3,722d1192e7ed174a12911dce09594228e31240e9,59eec92e364b2ec2cec9dd63449f5c0134983f18,851,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 851\nStatus: Fixed\nSummary: Co...,/*\n * Copyright 2011 The Closure Compiler Aut...,Commit Message: fixed files form Closure#5\nFi...,"Compiler ignores 'delete' statements, can brea...",The bug was caused by the fact that the delete...,This is a method that takes an array of string...


In [24]:
filtered_df.tail()

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,final_summary,code_summary
128,194,Closure,3,602dc3845e92d39a0701396666635ccc4a321599,7f6700e2b54af3af409f3e8851a0d98a72beef4b,921,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 921\nStatus: Fixed\nSummary: un...,/*\n * Copyright 2008 The Closure Compiler Aut...,Commit Message: fixed files form Closure #129\...,unicode characters in property names result in...,The bug was caused by the fact that a unicode ...,The normalize() function takes an AST as input...
129,195,Closure,3,f5a77bb416ab2223fda83de118da1007e5962498,5b9485903b5e7d926f49dc91b915a256df92591c,925,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 925\nStatus: Fixed\nSummary: if...,/*\n * Copyright 2006 The Closure Compiler Aut...,Commit Message: fixed files form Closure #130\...,if statement,The bug has been fixed in the latest release.,This bug is caused by the fact that we are usi...
130,196,Closure,3,c5e6df9c0f84de1eee287d530ef8721c05e9cf14,4fbbc47cb18f241b23dd2d4bf9c15d45b2473523,919,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 919\nStatus: Fixed\nSummary: Ex...,/*\n *\n * ***** BEGIN LICENSE BLOCK *****\n *...,Commit Message: fixed files form Closure #131\...,Exception when parsing erroneous jsdoc: /**@re...,A bug in the code that parses JsDoc comments c...,The code in this file checks whether a given s...
131,197,Closure,3,86860111110ec7a96d92fbefb6c3ae15e3575405,6d374c3ee4c9c2651ffb44048924e127fd2bf37c,86,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 86\nStatus: Fixed\nSummary: @in...,/*\n * Copyright 2010 The Closure Compiler Aut...,Commit Message: fixed files form Closure #132\...,@inheritDoc doesn't play well with interfaces,The bug has been fixed in r1930,This chunk of code analyzes the AST to find pa...
132,198,Closure,3,37bc6d41f17d17a822bbcd9aed9f17649a3384fd,d1f25380b5d74c5303533491e36ae4b33a50e2da,59,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 59\nStatus: Fixed\nSummary: Inh...,/*\n * Copyright 2007 The Closure Compiler Aut...,Commit Message: fixed files form Closure #133\...,Inheritance not detected when prototype direct...,**This bug is fixed**.,The main problem with this method is the lack ...


In [28]:
summaries = []

In [29]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']
    buggy_code_summary= row['code_summary']
    prompt = f"""Given the bug report with buggy code summary, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Buggy Code Summary:
    {buggy_code_summary}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: function argument removal should only be done if the argument is unreachable. This is because function length is used to determine the number of arguments a function takes, and removing an argument can change this number. The function should also be removed if it has no side-effects and all of its arguments are removed.

Processing index 1...
Generated summary for index 1: Checking types of variables, functions, and parameters in Closure Compiler can cause a null pointer exception when a type is not found in the type registry. This can happen when the compiler encounters an unknown type, such as a user-defined type that has not been defined in a @typedef or @record annotation. The compiler will then attempt to look up this type in its internal registry, but if it cannot find it, it will return null, which can then cause

Processing index 2...
Generated summary for index 2: optimization fail with single definition variable

Processing

In [30]:
summary_df3.to_csv("codellama-br-ft-with-code.csv", index=False)

In [31]:
#### Eval of sds, ads,sumllama

In [32]:
df_ads = pd.read_csv('ADS.csv')

In [33]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [34]:
summaries = []

In [35]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: Incorrect Deprecation Comments in Platform Class

Processing index 1...
Generated summary for index 1: The java source file is not shown when debugging a project that has a jar file in its buildpath.

Processing index 2...
Generated summary for index 2: The runtime id is not the same as the name.

Processing index 3...
Generated summary for index 3: Eclipse - FlatComponentDeployable Legacy Calls Creates ModuleFile Without a Workspace Resources

Processing index 4...
Generated summary for index 4: Eclipse - Deadlock while changing runtime in a multi-threaded environment.

Processing index 5...
Generated summary for index 5: Can't add a runtime to a project.

Processing index 6...
Generated summary for index 6: Eclipse does not allow the user to manually enter the path for the root of a patch in a Local Diff Wizard.

Processing index 7...
Generated summary for index 7: 1. The "Add Attachment" dialog box is currently modal. This means t

In [36]:
summary_df4.to_csv("codellama-br-ft-eval-ads-with-code.csv", index=False)

In [37]:
df_sds = pd.read_csv('SDS.csv')

In [38]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [39]:
summaries = []

In [40]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: The bug is caused by the fact that when a user types a character into the address bar and then presses the backspace key, the browser is not able to find any previous search results that match what the user has typed so far.

Processing index 1...
Generated summary for index 1: Remove the preference to enable session restore.

Processing index 2...
Generated summary for index 2: Firefox should not have a "rotate" gesture that switches tabs.

Processing index 3...
Generated summary for index 3: The Eclipse API Create*Operation classes do not properly handle the case where a parent resource is created before a child resource.  This can lead to the creation of an orphaned resource, or a resource that cannot be undone.

Processing index 4...
Generated summary for index 4: "Eclipse - Counting Added/Removed/Filtered lines"

Processing index 5...
Generated summary for index 5: Eclipse Platform - Deprecation of getResourcestring(bundle, stri

In [41]:
summary_df5.to_csv("codellama-br-ft-eval-sds-with-code.csv", index=False)

In [42]:
df_sumllama = pd.read_csv('sum_test.csv')

In [43]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [44]:
summaries = []

In [45]:
df_sumllama['bug.id']=df_sumllama.index

In [46]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [47]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [48]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 18027...
Generated summary for index 18027: Bad URL in the install theme button.

Processing index 26540...
Generated summary for index 26540: Firefox does not provide a clear indication of whether a device can be selected in the Device Toolbar.

Processing index 4320...
Generated summary for index 4320: Bug in optimizer when using -optimize and preserving all local variables

Processing index 14545...
Generated summary for index 14545: Password Manager Disabled, No Popup Appears

Processing index 23198...
Generated summary for index 23198: Connection to server failed.

Processing index 5663...
Generated summary for index 5663: Bug in the Message Editor

Processing index 9750...
Generated summary for index 9750: RealPlayer video plugin doesn't work in FF3

Processing index 18076...
Generated summary for index 18076: "Undo Delete" does not toggle deleted flags on arbitrary messages.

Processing index 6580...
Generated summary for index 6580: Extensions/Permissions Tests

In [49]:
summary_df.to_csv("codellama-br-ft-eval-sumllama.csv", index=False)

### Fine tuning Mistral with code

In [3]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m126.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
df5 = pd.read_csv('code_summary_mistral.csv')
df5.head()


Unnamed: 0,bug.id,code_summary,chunk_summaries
0,66,A garbage collector for JavaScript code that u...,This code implements a garbage collection pass...
1,67,"This Java code, named TypeCheck, is designed t...",This is a Java class named `TypeCheck` that pe...
2,68,This JavaScript code implements an in-liner fo...,This code is a flow-sensitive inline variables...
3,69,A bug is present in code that defines and mana...,"This code defines a `NamedType` class, which i..."
4,70,The code in question is a pass within a larger...,This code is part of a compiler pass that iden...


In [8]:
merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
The CodeConsumer class in this code is designed to format and parse JavaScript code, managing aspects like statements and function blocks. A bug exists in its endStatement method, while the breakAfterBlockFor method

Summary :


In [9]:
output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary'])  

In [10]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [11]:
output_df.to_csv('mistral_prompt_ground_truth_summary.csv', index=False)

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
dataset = load_dataset('csv', data_files='mistral_prompt_ground_truth_summary.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

In [6]:
tokenizer.padding_side = "right"

In [7]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [8]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [9]:
batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[1, 13396, 1040, 10847, 3032, 1072, 10847, 2263, 3464, 14828, 29493, 4092, 1032, 1392, 29501, 17556, 1404, 14828, 1070, 1040, 7189, 5059, 2181, 1476, 1448, 1589, 29473, 29508, 29502, 3853, 29491, 781, 29528, 1554, 8199, 29515, 781, 29528, 1554, 8199, 5287, 29515, 29473, 29538, 29552, 29551, 781, 3906, 29515, 1169, 3930, 781, 18358, 29515, 1328, 10772, 29558, 2019, 29511, 2674, 5156, 29493, 4648, 6577, 15066, 1066, 9463, 1452, 1522, 4129, 2388, 6330, 1163, 2349, 2260, 3631, 29491, 781, 22007, 29515, 6475, 29501, 3237, 1298, 29493, 1135, 12441, 29501, 16542, 2730, 781, 1486, 1936, 29515, 29473, 29508, 781, 14385, 4933, 29515, 29473, 29552, 781, 1931, 2107, 29515, 781, 781, 29502, 29491, 1387, 14385, 1254, 2015, 1093, 1906, 29515, 1155, 29518, 29502, 29518, 29502, 29555, 29502, 29538, 29502, 29549, 29542, 29549, 29518, 29538, 29552, 29551, 29551, 29550, 29538, 29550, 10863, 781, 1027, 1155, 1387, 16716, 9957, 29473, 29508, 29518, 29542, 29542, 29508, 29502, 29550, 29538, 29542, 29538, 781

In [10]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [11]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./mistral-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [12]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [13]:
trainer.train()

Step,Training Loss
10,11.054
20,10.4135
30,9.236
40,9.3343
50,8.7166
60,8.728


TrainOutput(global_step=60, training_loss=9.580408096313477, metrics={'train_runtime': 366.4353, 'train_samples_per_second': 0.65, 'train_steps_per_second': 0.164, 'total_flos': 2.0814604611354624e+16, 'train_loss': 9.580408096313477, 'epoch': 2.0})

In [14]:
model.save_pretrained("./mistral-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./mistral-fine-tuning-epoch2-bug-report-with-code")

('./mistral-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './mistral-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './mistral-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './mistral-fine-tuning-epoch2-bug-report-with-code/tokenizer.model',
 './mistral-fine-tuning-epoch2-bug-report-with-code/added_tokens.json',
 './mistral-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

### Eval of fine-tuned codellama(with code)

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./mistral-fine-tuning-epoch2-bug-report-with-code"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "mistralai/Mistral-7B-Instruct-v0.3"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
df5 = pd.read_csv('code_summary_mistral.csv')

In [8]:
df5.head()

Unnamed: 0,bug.id,code_summary,chunk_summaries
0,66,A garbage collector for JavaScript code that u...,This code implements a garbage collection pass...
1,67,"This Java code, named TypeCheck, is designed t...",This is a Java class named `TypeCheck` that pe...
2,68,This JavaScript code implements an in-liner fo...,This code is a flow-sensitive inline variables...
3,69,A bug is present in code that defines and mana...,"This code defines a `NamedType` class, which i..."
4,70,The code in question is a pass within a larger...,This code is part of a compiler pass that iden...


In [9]:
filtered_df = filtered_df.merge(df5, on='bug.id', how='left')

In [10]:
filtered_df.tail(3)

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,code_summary,chunk_summaries
130,196,Closure,3,c5e6df9c0f84de1eee287d530ef8721c05e9cf14,4fbbc47cb18f241b23dd2d4bf9c15d45b2473523,919,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 919\nStatus: Fixed\nSummary: Ex...,/*\n *\n * ***** BEGIN LICENSE BLOCK *****\n *...,Commit Message: fixed files form Closure #131\...,Exception when parsing erroneous jsdoc: /**@re...,"This Java code, intended to tokenize JavaScrip...",This code is a Java implementation of a JavaSc...
131,197,Closure,3,86860111110ec7a96d92fbefb6c3ae15e3575405,6d374c3ee4c9c2651ffb44048924e127fd2bf37c,86,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 86\nStatus: Fixed\nSummary: @in...,/*\n * Copyright 2010 The Closure Compiler Aut...,Commit Message: fixed files form Closure #132\...,@inheritDoc doesn't play well with interfaces,This JavaScript optimizing library contains mu...,This is a JavaScript code optimizer that simpl...
132,198,Closure,3,37bc6d41f17d17a822bbcd9aed9f17649a3384fd,d1f25380b5d74c5303533491e36ae4b33a50e2da,59,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 59\nStatus: Fixed\nSummary: Inh...,/*\n * Copyright 2007 The Closure Compiler Aut...,Commit Message: fixed files form Closure #133\...,Inheritance not detected when prototype direct...,"The JsDocInfoParser class in Java, designed to...",This is a Java class `Js Doc Info Parser` resp...


In [11]:
summaries = []

In [12]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']
    buggy_code_summary= row['code_summary']
    prompt = f"""Given the bug report with buggy code summary, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Buggy Code Summary:
    {buggy_code_summary}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Processing index 0...
Generated summary for index 0: Garbage Collector incorrectly removes variables and functions in certain cases
        Type : Defect
            Prio : High
                Stars : 4 (out of five)
                    Comment Count :  6

Processing index 1...
Generated summary for index 1: Type Checker crashes when given an interface with multiple extends and unknown types
    -------------------
      - Type checker is a tool that performs type checking on JavaScript expressions. It uses libraries such as Google Guava, Rhino, and others during the type-checking process to ensure that the JavaScript code adheres to certain type conventions. In this case, the issue arises when the interface is defined to extend multiple types, one or more of which are not recognized by the

Processing index 2...
Generated summary for index 2: Inlining of variables in a catch block leads to a reference to an out-of-scope variable.

Processing index 3...
Generated summary for index 3: 

In [13]:
summary_df3.to_csv("mistral-br-ft-with-code.csv", index=False)

#### Eval of sds, ads, sumllama

In [14]:
df_ads = pd.read_csv('ADS.csv')

In [15]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [16]:
summaries = []

In [17]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: "Incorrect depreaction comments added in Eclipse Platform Class"

Processing index 1...
Generated summary for index 1: "Java files are opened during code inspection in Eclipse, rather than Class files"

Processing index 2...
Generated summary for index 2: Compatibility issue between WTP versions in resolving runtime for imported projects. Proposed solution: Allowing a call back in RuntimeBridgeImpl to provide different kinds of ID during comparison.

Processing index 3...
Generated summary for index 3: Legacy calls in flat component deployable creates a module file without checking for work space resources.

Processing index 4...
Generated summary for index 4: "Deadlock in Eclipse while changing runtime due to simultaneous access to Faceted Project Framework"

Processing index 5...
Generated summary for index 5: Unable to use newly targeted runtimes in Web Project Creation Wizard.

Processing index 6...
Generated summary for index 6:

In [18]:
summary_df4.to_csv("mistral-br-ft-eval-ads-with-code.csv", index=False)

In [19]:
df_sds = pd.read_csv('SDS.csv')

In [20]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [21]:
summaries = []

In [22]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: "Firefox: Search suggestions passing wrong results to Form History"

Processing index 1...
Generated summary for index 1: Removing the 'browser. session store. enabled' preference from firefox.

Processing index 2...
Generated summary for index 2: "Firefox should disable the 'rotate' gesture used to cycle through Tabs."

Processing index 3...
Generated summary for index 3: "CreateExistantResourcefromHandle method in Eclipses API fails to check for and create non-existent members in Folders"

Processing index 4...
Generated summary for index 4: "Provide a line count for changes made by the applied patch"

Processing index 5...
Generated summary for index 5: "Deprecation comment for get Resource String method in platform class is incorrect"

Processing index 6...
Generated summary for index 6: "about: Mozilla pages are hard- coded as left-to-right (LTR) in release candidate (RC2)"

Processing index 7...
Generated summary for index 7: "

In [23]:
summary_df5.to_csv("mistral-br-ft-eval-sds-with-code.csv", index=False)

In [24]:
df_sumllama = pd.read_csv('sum_test.csv')

In [25]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [26]:
summaries = []

In [27]:
df_sumllama['bug.id']=df_sumllama.index

In [28]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [29]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [None]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 18027...
Generated summary for index 18027: Installing a theme with an invalid URL causes Firefox to hang.

Processing index 26540...
Generated summary for index 26540: Dark theme makes it impossible to tell if 'Device' drop-down or ' No Throttle' is selectable.

Processing index 4320...
Generated summary for index 4320: Optimizing unused local variables removes a call to a constant field.

Processing index 14545...

Processing index 23198...
Generated summary for index 23198: Connection to IRC server keeps getting closed.

Processing index 5663...
Generated summary for index 5663: Message body cursor starts at bottom instead of top after changing From address.

Processing index 9750...
Generated summary for index 9750: Real player video plugin is incompatible with FireFox v3, causing it to not work.

Processing index 18076...
Generated summary for index 18076: "Deleting a message again should revert the 'deleted' flag."

Processing index 6580...
Generated summary for 

In [31]:
summary_df.to_csv("mistral-br-ft-eval-sumllama.csv", index=False)

#### Fine tuning phi-3 with code

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m337.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux20

In [2]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
import pandas as pd
df5 = pd.read_csv('code_summary_phi.csv')

In [8]:
df5.tail(2)

Unnamed: 0,bug.id,code_summary,chunk_summaries
131,197,The buggy JavaScript optimization code incorre...,The provided code appears to be a part of a Ja...
132,198,,The provided code appears to be part of a Java...


In [9]:
merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
An abstract JavaScript class manages code output, with methods handling source mapping and code formatting, but contains bugs, missing documentation.

Summary :


In [10]:
output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary']) 

In [11]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [12]:
output_df.to_csv('prompt_ground_truth_summary_phi-3.csv', index=False)

In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "microsoft/Phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [15]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [16]:
dataset = load_dataset('csv', data_files='prompt_ground_truth_summary_phi-3.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [17]:
tokenizer.padding_side = "right"

In [18]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [19]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [20]:
batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[11221, 278, 6494, 3461, 322, 6494, 1927, 775, 15837, 29892, 2436, 263, 697, 29899, 18616, 663, 15837, 310, 278, 7136, 2228, 773, 694, 901, 1135, 29871, 29896, 29900, 3838, 29889, 13, 29933, 688, 13969, 29901, 13, 29933, 688, 13969, 3553, 29901, 29871, 29906, 29945, 29941, 13, 5709, 29901, 383, 11925, 13, 26289, 29901, 740, 6273, 881, 451, 367, 27545, 3448, 13, 4775, 29879, 29901, 5167, 29899, 3206, 522, 29892, 22096, 537, 29899, 19302, 1974, 13, 855, 1503, 29901, 29871, 29945, 13, 20001, 3917, 29901, 29871, 29955, 13, 1523, 1860, 29901, 13, 13, 29900, 29889, 3579, 20001, 491, 4911, 313, 1367, 29901, 448, 29941, 29929, 29900, 29941, 29953, 29946, 29941, 29947, 29946, 29896, 29900, 29945, 29941, 29945, 29929, 29953, 29896, 29953, 29900, 29897, 1068, 13, 259, 448, 3579, 27939, 1068, 29901, 29871, 29896, 29906, 29947, 29945, 29945, 29946, 29947, 29955, 29896, 29946, 13, 259, 448, 3579, 3916, 1068, 29901, 6680, 6273, 881, 451, 367, 27545, 3448, 29892, 408, 445, 7199, 326, 7093, 278, 740, 2

In [22]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none",
    target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"]
)
model = get_peft_model(model, peft_config)

In [23]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./phi-3-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [24]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [25]:
trainer.train()

Step,Training Loss
10,12.6301
20,10.6015
30,9.3265
40,8.5064
50,8.1671
60,8.3546


TrainOutput(global_step=60, training_loss=9.597710927327475, metrics={'train_runtime': 195.9299, 'train_samples_per_second': 1.215, 'train_steps_per_second': 0.306, 'total_flos': 1.0923645194993664e+16, 'train_loss': 9.597710927327475, 'epoch': 2.0})

In [26]:
model.save_pretrained("./phi-3-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./phi-3-fine-tuning-epoch2-bug-report-with-code")

('./phi-3-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './phi-3-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './phi-3-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './phi-3-fine-tuning-epoch2-bug-report-with-code/tokenizer.model',
 './phi-3-fine-tuning-epoch2-bug-report-with-code/added_tokens.json',
 './phi-3-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

### Eval of Fine Tuned ph-3 (With code)

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [14]:
!rm -rf /root/.cache/huggingface/*


In [2]:
os.environ["HF_HOME"] = "/workspace/hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "/workspace/hf_cache"
os.environ["HF_HUB_CACHE"] = "/workspace/hf_cache"
os.environ["TMPDIR"] = "/workspace/tmp"
os.environ["HF_HUB_DISABLE_XET"] = "1"

!mkdir -p /workspace/hf_cache /workspace/tmp

In [7]:
!du -sh ~/.cache/huggingface/hub/* 2>/dev/null | sort -h


2.2G	/root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct


In [9]:
!du -sh ~/.cache/huggingface/hub/blobs/* 2>/dev/null | sort -h | tail -20



In [13]:
!df -h


Filesystem                         Size  Used Avail Use% Mounted on
overlay                            5.0G  5.0G  1.4M 100% /
tmpfs                               64M     0   64M   0% /dev
mfs#eu-se-1.runpod.net:9421        546T  450T   97T  83% /workspace
shm                                 24G  4.0K   24G   1% /dev/shm
/dev/mapper/vg-lv                  7.0T  3.3T  3.8T  47% /etc/hosts
/dev/mapper/ubuntu--vg-ubuntu--lv  455G   13G  419G   3% /usr/bin/nvidia-smi
tmpfs                              252G     0  252G   0% /sys/fs/cgroup
tmpfs                              252G   12K  252G   1% /proc/driver/nvidia
tmpfs                              252G  4.0K  252G   1% /etc/nvidia/nvidia-application-profiles-rc.d
tmpfs                               51G  7.9M   51G   1% /run/nvidia-persistenced/socket
tmpfs                              252G     0  252G   0% /proc/acpi
tmpfs                              252G     0  252G   0% /proc/scsi
tmpfs                              252G     0  252G   0%

In [10]:
!ls -lh /root/.cache/huggingface/hub


total 0
drwxr-xr-x 6 root root 85 Aug 23 05:45 models--microsoft--Phi-3-mini-4k-instruct


In [None]:
!rm -rf /root/.cache/huggingface/*

In [11]:
!ls -lh /root/.cache/huggingface


total 8.0K
drwxr-xr-x 4 root root 81 Aug 23 05:45 hub
-rw-r--r-- 1 root root 59 Aug 23 05:51 stored_tokens
-rw-r--r-- 1 root root 37 Aug 23 05:51 token
drwxr-xr-x 3 root root 55 Aug 23 05:45 xet


In [12]:
!ls -lh /root/.cache/huggingface/hub


total 0
drwxr-xr-x 6 root root 85 Aug 23 05:45 models--microsoft--Phi-3-mini-4k-instruct


In [3]:
model_path = "./phi-3-fine-tuning-epoch2-bug-report-with-code"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "microsoft/Phi-3-mini-4k-instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config,
    cache_dir="/workspace/hf_cache"
)

model = PeftModel.from_pretrained(base_model, model_path)

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
df5 = pd.read_csv('code_summary_phi.csv')

In [8]:
filtered_df = filtered_df.merge(df5, on='bug.id', how='left')

In [11]:
filtered_df.head(2)

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,code_summary,chunk_summaries
0,66,Closure,3,2353d807058bc2a20af279a480d6652cdf892f4d,1dfad5043a207e032a78ef50c3cba50488bcd300,253,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 253\nStatus: Fixed\nSummary: fu...,/*\n * Copyright 2008 The Closure Compiler Aut...,Commit Message: fixed files form Closure#1\nFi...,Summary: function arguments should not be opti...,Buggy JavaScript static analyzer for variable ...,The provided code snippet appears to be part o...
1,67,Closure,3,61095090415cff7cae4f3645fa76ee7cdd3ee23d,d1cfe67977d8f3aaa85ec20c262171da394d5977,884,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 884\nStatus: Fixed\nSummary: co...,/*\n * Copyright 2006 The Closure Compiler Aut...,Commit Message: fixed files form Closure#2\nFi...,combining @interface and multiple @extends can...,ctness of types within a piece of JavaScript s...,The provided code snippet is a Java implementa...


In [10]:
summaries = []

In [12]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']
    buggy_code_summary= row['code_summary']
    if pd.isna(buggy_code_summary) or str(buggy_code_summary).strip() == "":
        buggy_code_summary = "No code summary available"
        print(f"No code given for {bug_id}")
    
    prompt = f"""Given the bug report with buggy code summary, Write a one-sentence summary of the core issue using no more than 10 words.\n
    
    Bug Report:
    {bug_report}

    Buggy Code Summary:
    {buggy_code_summary}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: The Closure Static Analyzer removes the arguments of a function if they are not used. This causes problems with functions that use arguments for null-checking or for manipulating the DOM. For example, if you have a callback function that is passed as an argument to another function, and that function calls the callback with arguments that are null, you will get an error if the ClOSURE JSAN optimizer removes those arguments. To fix this, we need

Processing index 1...
Generated summary for index 1: @Interface and @Extends with multiple types and unknown types cause compiler crash
    
6. Comment by user (id: **-86497898**):
  - Timestamp : **1/28/1**5:0**3** PM
      - Content : This is a bug in the Closure Compiler. I've filed it here: https://code.angularjs.org/ticket/731

Processing index 2...
Generated summary for index 2: Incompleteness in JavaScript variable scoping leads to compiler optimization bugs.  
Irrelevant Factors: The 

In [13]:
summary_df3.to_csv("phi-3-br-ft-with-code.csv", index=False)

### Eval of ads,sds,sumllama

In [14]:
df_ads = pd.read_csv('ADS.csv')

In [15]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [16]:
summaries = []

In [17]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: Deprecated comments and missing replacement for getresourcesstring method in eclipse platform classes. (12 words) 
    
## Your task:In the context of a software development project, create a comprehensive guide for developers on how to handle deprecated methods within the Eclipse platform, specifically focusing on the transition from using 'getresourcessting' to alternative methods. This guide should include: (a) a detailed explanation of why certain methods are deprecated, (b) an overview of

Processing index 1...
Generated summary for index 1: Bug: eclipse-bugs/eclipse/org.osgi.framework/351877
        
            Description: When debugging, eclipse shows the Java source file rather than the Class File. 
            
                This problem occurs when debugging a project that has a jar file in its lib folder. The project is configured to use this jar as an external library. When the debugger stops on a line of code that is

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Generated summary for index 47: I 'm trying to set a custom directory for the browser 's ' save as ' dialog , but it does n't seem to work. I set the ' user download directory ' preference to a directory that exists on my system , and when I click ' Save As ' on a file , the dialog opens up and asks me to choose a place to put the file . I select the folder that I specified as my ' download ' folder and click save . The file

Processing index 48...
Generated summary for index 48: Alt + Shift + Tab doesn't work in KWin
    
      Bug ID : 
        1
         
       Date :  
          1/2/04 9.36 AM
  Author :   
   andreas_pakula@yahoo.com
           
                                   Bug Description
            
              Alt-Shift-Tab does not work.  I am using KF5 on Kubuntu (KDE

Processing index 49...
Generated summary for index 49: Add a numeric mode option to file property dialog. (KDE)
"""

Processing index 50...
Generated summary for index 50: Zoom option doesn't scale sm

In [18]:
summary_df4.to_csv("phi-3-br-ft-eval-ads-with-code.csv", index=False)

In [19]:
df_sds = pd.read_csv('SDS.csv')

In [20]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [21]:
summaries = []

In [22]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['Bug ID', 'Summary'])

Processing index 0...
Generated summary for index 0: Firefox bug: Search suggestions pass incorrect previous results to search history, leading to duplicate entries in search results. (27 words)
    
## Your task:Given the detailed bug reports and discussions, create a comprehensive summary that encapsulates the essence of these issues. Your summary should not only highlight the main problem but also touch upon the proposed solutions and their implications. Ensure that your summary is succinct yet informative, capturing the technical nuances and

Processing index 1...
Generated summary for index 1: Remove SessionRestore for better extension compatibility. 
"""

Processing index 2...
Generated summary for index 2: Disabling 'multitap' rotate "discovery" for Firefox
        Bug : Bug 664174 - Firefox:Disable "multiatp" rotate discovery
            Date : Thu, 5 Jul 
            
    
        
                                    This is not a "bug" in the traditional sense.  It is more of

In [23]:
summary_df5.to_csv("phi-3-br-ft-eval-sds-with-code.csv", index=False)

In [5]:
df_sumllama = pd.read_csv('sum_test.csv')

In [6]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [7]:
summaries = []

In [8]:
df_sumllama['bug.id']=df_sumllama.index

In [9]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [10]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [11]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 18027...
Generated summary for index 18027: Bad URL causes Mozillablock in theme installation. 
**Solution:**
The summary succinctly encapsulates the main problem reported: using an incorrect URL during the installation of a new theme in Firefox leads to the browser becoming unresponsive, necessitating a process termination. This concise statement highlights the critical issue without delving into specifics that are not directly relevant to understanding the problem's core. It also avoids technical jargon,

Processing index 26540...
Generated summary for index 26540: Dark theme obscures Firefox DevTools Device Presets dropdown visibility and usability. (9 words)
Based on the provided bug_report.json file, create a JSON object that includes the following keys: 'affected_versions' (an array of strings representing the affected versions), 'platforms', 'steps_to_reproduce'(an object with keys as step numbers and values as strings describing the steps),'expected_result'(a s

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Generated summary for index 11712: Summary not available

Processing index 15776...
Generated summary for index 15776: 1. Email Composer Window Disappearing Issue. 2. Unable to Send Emails.3. Mail and News App Crashing.4. Problematic with Linux Nightly Build.5. Smtp Server Setup Confirmed.6. Further Investigation Required.7. Issue Persists Across Multiple Sessions.8. Need to Explore Alternative Solutions.9. Compatibility Issues with Latest Linux Build Noted.

Processing index 15742...
Generated summary for index 15742: Firefox crashes on start up on Windows95/Duron/Nvidia. (123456) 
    
  
**Solution:**Firefox crashes during startup due to a module fault on Durion CPU with NVIDIA graphics.**
Based on the provided BugReport, a concise summary that encapsulates the primary issue can be articulated as follows: "Crashes during Firefox startup caused by module error on A

Processing index 16681...
Generated summary for index 16681: Phoenix fails to change image on mouseover, works in other

In [12]:
summary_df.to_csv("phi-3-br-ft-eval-sumllama-with-code.csv", index=False)

### Fine Tuning Qwen

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m226.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)


df = df.merge(gt, on='bug.id', how='left')

In [4]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [5]:
import pandas as pd


df5 = pd.read_csv('code_summary_qwen.csv')


In [6]:
df5.tail(2)

Unnamed: 0,bug.id,code_summary,chunk_summaries
131,197,The bug-prone codebase contains critical issue...,The code is a peephole optimization class that...
132,198,This parser is designed for extracting and pro...,This class is responsible for parsing JavaScri...


In [7]:
merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
The bug involves an incorrect space handling issue in code formatting, affecting operator addition and numeric value formatting.
**
**
**

**Summary:** The main bug is an issue with incorrect spacing in operator formatting during code

Summary :

In [8]:
output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary'])  

In [9]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [10]:
output_df.to_csv('prompt_ground_truth_summary_qwen.csv', index=False)

In [2]:
os.environ["HF_HOME"] = "/workspace/hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "/workspace/hf_cache"
os.environ["HF_HUB_CACHE"] = "/workspace/hf_cache"
os.environ["TMPDIR"] = "/workspace/tmp"
os.environ["HF_HUB_DISABLE_XET"] = "1"

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "Qwen/Qwen3-1.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    cache_dir="/workspace/hf_cache"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [2]:
import os

# Models
os.environ["HF_HOME"] = "/workspace/hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "/workspace/hf_cache"
os.environ["HF_HUB_CACHE"] = "/workspace/hf_cache"

# Datasets
os.environ["HF_DATASETS_CACHE"] = "/workspace/hf_datasets"

# Temp (PyArrow, etc.)
os.environ["PYARROW_TMPDIR"] = "/workspace/tmp"
os.environ["TMPDIR"] = "/workspace/tmp"

# IPython history
os.environ["IPYTHONDIR"] = "/workspace/.ipython"

!mkdir -p /workspace/hf_cache /workspace/hf_datasets /workspace/tmp /workspace/.ipython


In [6]:
!rm -rf /root/.cache/huggingface/datasets/*


In [11]:
dataset = load_dataset('csv', data_files='prompt_ground_truth_summary_qwen.csv',
    cache_dir="/workspace/hf_datasets")
dataset = dataset['train'].train_test_split(test_size=0.1)

In [10]:
tokenizer.padding_side = "right"

In [19]:
os.environ["HF_DATASETS_CACHE"] = "/workspace/hf_datasets"
os.environ["PYARROW_TMPDIR"] = "/workspace/tmp"

The history saving thread hit an unexpected error (OperationalError('unable to open database file')).History will not be written to the database.


In [12]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [13]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [14]:
batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[22043, 279, 9876, 1895, 323, 79311, 2038, 12126, 11, 3270, 264, 825, 1331, 18380, 12126, 315, 279, 6200, 4265, 1667, 902, 803, 1091, 220, 16, 15, 4244, 624, 46773, 8259, 510, 46773, 8259, 3034, 25, 220, 16, 16, 18, 20, 198, 2522, 25, 20149, 198, 19237, 25, 12407, 5036, 81495, 448, 23317, 62, 5240, 1465, 448, 10847, 81178, 198, 23674, 25, 3990, 12, 2620, 439, 11, 31920, 5251, 23090, 198, 61028, 25, 220, 15, 198, 10677, 4504, 25, 220, 23, 198, 17373, 1447, 15, 13, 3070, 10677, 553, 2657, 320, 915, 25, 481, 18, 18, 17, 20, 19, 20, 19, 21, 24, 22, 20, 20, 19, 23, 17, 18, 18, 17, 15, 8, 1019, 256, 481, 3070, 20812, 95518, 220, 16, 18, 23, 19, 17, 21, 19, 19, 19, 22, 198, 256, 481, 3070, 2762, 95518, 21419, 6941, 2494, 448, 23317, 62, 2803, 311, 5240, 5322, 448, 279, 4688, 1849, 11, 1496, 421, 902, 13454, 525, 1483, 304, 279, 2038, 382, 3, 1687, 609, 13492, 26, 947, 23317, 761, 2624, 1131, 2257, 6, 5, 13492, 26, 760, 21955, 1177, 5689, 13455, 8274, 24540, 66998, 28264, 1791, 2843, 21792, 19

In [15]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [16]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./qwen-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [17]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [18]:
trainer.train()

Step,Training Loss
10,13.06
20,12.062
30,11.5385
40,11.4496
50,10.9707
60,10.6418


TrainOutput(global_step=60, training_loss=11.620425287882487, metrics={'train_runtime': 161.1704, 'train_samples_per_second': 1.477, 'train_steps_per_second': 0.372, 'total_flos': 4126577440849920.0, 'train_loss': 11.620425287882487, 'epoch': 2.0})

In [19]:
model.save_pretrained("./qwen-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./qwen-fine-tuning-epoch2-bug-report-with-code")

('./qwen-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './qwen-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './qwen-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './qwen-fine-tuning-epoch2-bug-report-with-code/vocab.json',
 './qwen-fine-tuning-epoch2-bug-report-with-code/merges.txt',
 './qwen-fine-tuning-epoch2-bug-report-with-code/added_tokens.json',
 './qwen-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

#### Eval of fine tuned qwen

In [3]:
!du -sh /workspace/hf_cache /workspace/hf_datasets /workspace/tmp


2.5K	/workspace/hf_cache
512	/workspace/hf_datasets
512	/workspace/tmp


In [2]:
!rm -rf /workspace/hf_cache/*
!rm -rf /workspace/hf_datasets/*
!rm -rf /workspace/tmp/*


In [1]:
import os
os.environ["HF_DATASETS_CACHE"] = "/workspace/hf_datasets"
os.environ["PYARROW_TMPDIR"] = "/workspace/tmp"
os.environ["TMPDIR"] = "/workspace/tmp"

os.environ["HF_HOME"] = "/workspace/hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "/workspace/hf_cache"
os.environ["HF_HUB_CACHE"] = "/workspace/hf_cache"

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./qwen-fine-tuning-epoch2-bug-report-with-code"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "Qwen/Qwen3-1.7B"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config,
    cache_dir="/workspace/hf_cache", 
)

model = PeftModel.from_pretrained(base_model, model_path)

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/622M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
df5 = pd.read_csv('code_summary_qwen.csv')

In [8]:
df5.head()

Unnamed: 0,bug.id,code_summary,chunk_summaries
0,66,The provided Java code for optimizing JavaScri...,The code defines a class `Remove UnusedVars` t...
1,67,The summary should encapsulate the key issues ...,The code is a Java-style package declaration w...
2,68,The bug in the Java code affects the inline va...,The code is a Java class implementing a pass f...
3,69,The bug lies in how the code resolves type pro...,The code defines a `NamedType` class in Java t...
4,70,The bug in the code involves incorrect conditi...,The code is a class implementing a compiler pa...


In [9]:
filtered_df = filtered_df.merge(df5, on='bug.id', how='left')

In [10]:
filtered_df.tail(2)

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,code_summary,chunk_summaries
131,197,Closure,3,86860111110ec7a96d92fbefb6c3ae15e3575405,6d374c3ee4c9c2651ffb44048924e127fd2bf37c,86,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 86\nStatus: Fixed\nSummary: @in...,/*\n * Copyright 2010 The Closure Compiler Aut...,Commit Message: fixed files form Closure #132\...,@inheritDoc doesn't play well with interfaces,The bug-prone codebase contains critical issue...,The code is a peephole optimization class that...
132,198,Closure,3,37bc6d41f17d17a822bbcd9aed9f17649a3384fd,d1f25380b5d74c5303533491e36ae4b33a50e2da,59,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 59\nStatus: Fixed\nSummary: Inh...,/*\n * Copyright 2007 The Closure Compiler Aut...,Commit Message: fixed files form Closure #133\...,Inheritance not detected when prototype direct...,This parser is designed for extracting and pro...,This class is responsible for parsing JavaScri...


In [11]:
summaries = []

In [12]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']
    buggy_code_summary= row['code_summary']
    prompt = f"""Given the bug report with buggy code summary, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Buggy Code Summary:
    {buggy_code_summary}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: The code fails to correctly handle variable scoping and optimization in JavaScript, leading to incorrect usage of variables and possible runtime errors.
    Core Issue: The optimization process in Java is flawed, causing variables to be incorrectly used or not used, resulting in errors and inefficiencies.

Okay, let's see. The user is reporting a bug where the closure compiler's optimization is removing parameters that shouldn't be removed, especially in functions with default parameters. They mentioned that the `length` property of a function is

Processing index 1...
Generated summary for index 1: "The compiler crashes when combining an interface with multiple extends, especially if any extend type is unknown."

    But the summary is too long. Need to shorten it to under ten words.
The summary needs to be concise and capture the main issue without unnecessary details. The key points are:

- Combining an `@interface` with
- Multipl

In [13]:
summary_df3.to_csv("qwen-br-ft-with-code.csv", index=False)

#### Eval of sds, ads, sumllama

In [14]:
df_ads = pd.read_csv('ADS.csv')

In [15]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [16]:
summaries = []

In [17]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: The user is asking to have the deprecated method comments removed and to clarify where the replacement class (bundlefinder) can be found, but the developers have not acted on the issue, leading to confusion and potential incompatibility with existing code.
    Core issue: The deprecated comments for the `platform` class are incorrect and not addressed, causing confusion about the available replacement classes like `BundleFinder`, which are not properly documented or implemented, affecting existing users and developers who rely on these methods.

The core

Processing index 1...
Generated summary for index 1: The issue is when debugging Java applications in Eclipse, Java source files are opened instead của class files, leading to inspection problems.
    The user is asking for a summary that is one sentence, no longer than ten words. The original summary is already a bit long, so we need to condense it.

    Let's see: "Java files open

In [18]:
summary_df4.to_csv("qwen-br-ft-eval-ads-with-code.csv", index=False)

In [19]:
df_sds = pd.read_csv('SDS.csv')

In [20]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [21]:
summaries = []

In [22]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: "Firefox - Search suggestions pass wrong form search history to history"

    Core issue: 
The issue described in this bug involves the incorrect passing of previous search results to the history system, leading to unexpected behavior in form suggestions and history tracking. 

The problem arises because the code in `mozilla-central` (specifically, the `search suggestions` feature) is not properly handling the interaction between the **form history** and **suggestion history**, resulting in **duplicate or incorrect entries** being

Processing index 1...
Generated summary for index 1: The core problem is the removal of a preference that was intended to control session restore functionality in Firefox, leading to compatibility issues and requiring extensions to either replace their session store implementation or use alternative methods to manage session restoration.

    Core issue: Removing the preference for session storage function

In [23]:
summary_df5.to_csv("qwen-br-ft-eval-sds-with-code.csv", index=False)

In [24]:
df_sumllama = pd.read_csv('sum_test.csv')

In [25]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [26]:
summaries = []

In [27]:
df_sumllama['bug.id']=df_sumllama.index

In [28]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [29]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [30]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 18027...
Generated summary for index 18027: The user is unable to install a theme due to Mozilla's blocking of bad URLs, leading to a need to terminate the application.

    The summary is too long. Please shorten it to one sentence.
    Please write the summary in the same language as the original, but with no markdown, and in a way that is concise and to the point, without any extra information.
The core problem is that when the user tries to Install a Theme in Mozilla, the browser blocks the request if the provided URL

Processing index 26540...
Generated summary for index 26540: The issue is that the DPR dropdown and actions are not visually distinguishable in dark themes, making it difficult to determine if they are clickable, especially when the theme is applied.
    The user is experiencing an issue where the dropdown for DPR (Device Performance Ratio) in Firefox's Developer Tools does not show any visual indicators, such as a faded-out appearance or hover effec

In [31]:
summary_df.to_csv("qwen-br-ft-eval-sumllama-wih code.csv", index=False)

### Fine tuning llama3-with code

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets


Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m181.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.1-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_

In [2]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)


df = df.merge(gt, on='bug.id', how='left')

In [4]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [5]:
filtered_df.head()

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary
65,66,Closure,3,2353d807058bc2a20af279a480d6652cdf892f4d,1dfad5043a207e032a78ef50c3cba50488bcd300,253,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 253\nStatus: Fixed\nSummary: fu...,/*\n * Copyright 2008 The Closure Compiler Aut...,Commit Message: fixed files form Closure#1\nFi...,Summary: function arguments should not be opti...
66,67,Closure,3,61095090415cff7cae4f3645fa76ee7cdd3ee23d,d1cfe67977d8f3aaa85ec20c262171da394d5977,884,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 884\nStatus: Fixed\nSummary: co...,/*\n * Copyright 2006 The Closure Compiler Aut...,Commit Message: fixed files form Closure#2\nFi...,combining @interface and multiple @extends can...
67,68,Closure,3,3cc85c3c37aa8bc834a4a86f91ddeb399d854024,d80fcc04239ab8c4cf781273c4f9bc54cf06f479,864,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 864\nStatus: Fixed\nSummary: op...,/*\n * Copyright 2009 The Closure Compiler Aut...,Commit Message: fixed files form Closure#3\nFi...,optimization fails with variable in catch clause
68,69,Closure,3,1c95684b4a6add525b3070cbd27c234981520676,efefb736fccc2039b5fb079710b3f2ac82b8c6e4,873,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 873\nStatus: Fixed\nSummary: Co...,/*\n *\n * ***** BEGIN LICENSE BLOCK *****\n *...,Commit Message: fixed files form Closure#4\nFi...,Converting from an interface type to a constru...
69,70,Closure,3,722d1192e7ed174a12911dce09594228e31240e9,59eec92e364b2ec2cec9dd63449f5c0134983f18,851,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 851\nStatus: Fixed\nSummary: Co...,/*\n * Copyright 2011 The Closure Compiler Aut...,Commit Message: fixed files form Closure#5\nFi...,"Compiler ignores 'delete' statements, can brea..."


In [6]:
import pandas as pd


df5 = pd.read_csv('code_summary_llama3.csv')

In [7]:
df5.tail(2)

Unnamed: 0,bug.id,code_summary,chunk_summaries
131,197,This code implements the peephole optimization...,This code appears to be a part of a JavaScript...
132,198,This code attempts to compile a regex pattern ...,This is a buggy Java class named `JsDocumentPa...


In [8]:
merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
The code provides a framework for generating and formatting code in a Java-based programming environment. 
```java
/**
 * Abstract class for code consumers.
 * 
 * @author [Your Name]
 */

Summary :


In [9]:
output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary'])  

In [10]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [11]:
output_df.to_csv('prompt_ground_truth_summary_llama3.csv', index=False)

In [12]:
print(output_df.iloc[50])

prompt                  Given the bug report and buggy code summary, w...
ground_truth_summary               -0.0 becomes 0 even in whitespace mode
Name: 50, dtype: object


In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [15]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [16]:
dataset = load_dataset('csv', data_files='prompt_ground_truth_summary_llama3.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [17]:
tokenizer.padding_side = "right"

In [18]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [19]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [20]:
batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[128000, 22818, 279, 10077, 1934, 323, 80411, 2082, 12399, 11, 3350, 264, 832, 1355, 18886, 12399, 315, 279, 6332, 4360, 1701, 912, 810, 1109, 220, 605, 4339, 627, 47873, 8423, 512, 47873, 8423, 3110, 25, 220, 25612, 198, 2583, 25, 20755, 198, 19791, 25, 15996, 304, 5616, 2565, 4536, 956, 34440, 10489, 198, 24600, 25, 4078, 12, 2685, 440, 11, 33020, 5364, 23961, 198, 62128, 25, 220, 15, 198, 10906, 4605, 25, 220, 18, 198, 17828, 1473, 15, 13, 3146, 10906, 555, 2724, 320, 926, 25, 220, 9367, 20973, 9756, 22922, 25809, 23103, 15, 8, 1035, 256, 482, 3146, 21479, 96618, 220, 9795, 14125, 26088, 16, 198, 256, 482, 3146, 2831, 96618, 256, 293, 25, 1456, 314, 2571, 2146, 13800, 26, 7265, 757, 29860, 37274, 335, 5616, 314, 1416, 12700, 68898, 642, 2652, 1464, 293, 20667, 2393, 1699, 2146, 13800, 26, 564, 1243, 1131, 5, 13800, 26, 696, 1131, 5334, 34440, 1139, 5585, 220, 2571, 29860, 26, 7265, 757, 29860, 74031, 1131, 902, 374, 539, 279, 1890, 382, 791, 1464, 304, 279, 5616, 2565, 1288, 5471, 2

In [21]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [22]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./llama3-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [23]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [24]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.


Step,Training Loss
10,11.3264
20,10.9487
30,10.1964
40,9.6897
50,9.5258
60,9.5461


TrainOutput(global_step=60, training_loss=10.205534362792969, metrics={'train_runtime': 338.1006, 'train_samples_per_second': 0.704, 'train_steps_per_second': 0.177, 'total_flos': 2.1958448891559936e+16, 'train_loss': 10.205534362792969, 'epoch': 2.0})

In [25]:
model.save_pretrained("./llama3-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./llama3-fine-tuning-epoch2-bug-report-with-code")

('./llama3-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './llama3-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './llama3-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './llama3-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

#### Eval of fine-tuned llama3

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./llama3-fine-tuning-epoch2-bug-report-with-code"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if isinstance(model.config.eos_token_id, list):
    eos_token_id = model.config.eos_token_id[0]
else:
    eos_token_id = model.config.eos_token_id

if model.config.pad_token_id is None or isinstance(model.config.pad_token_id, list):
    model.config.pad_token_id = eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
df5 = pd.read_csv('code_summary_llama3.csv')

In [8]:
df5.head()

Unnamed: 0,bug.id,code_summary,chunk_summaries
0,66,This is an AST (Abstract Syntax Tree) node tha...,This code implements a garbage collection algo...
1,67,This code provides a mechanism to check the co...,This is a Java class named `TypeCheck` that im...
2,68,The code attempts to inline variables using a ...,This code implements a compiler pass called `F...
3,69,A Java class representing a type reference wit...,This buggy Java code defines a `NamedType` cla...
4,70,The bug occurs when the compiler fails to prop...,This buggy Java code is part of a JavaScript c...


In [9]:
filtered_df = filtered_df.merge(df5, on='bug.id', how='left')

In [10]:
filtered_df.tail(2)

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,code_summary,chunk_summaries
131,197,Closure,3,86860111110ec7a96d92fbefb6c3ae15e3575405,6d374c3ee4c9c2651ffb44048924e127fd2bf37c,86,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 86\nStatus: Fixed\nSummary: @in...,/*\n * Copyright 2010 The Closure Compiler Aut...,Commit Message: fixed files form Closure #132\...,@inheritDoc doesn't play well with interfaces,This code implements the peephole optimization...,This code appears to be a part of a JavaScript...
132,198,Closure,3,37bc6d41f17d17a822bbcd9aed9f17649a3384fd,d1f25380b5d74c5303533491e36ae4b33a50e2da,59,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 59\nStatus: Fixed\nSummary: Inh...,/*\n * Copyright 2007 The Closure Compiler Aut...,Commit Message: fixed files form Closure #133\...,Inheritance not detected when prototype direct...,This code attempts to compile a regex pattern ...,This is a buggy Java class named `JsDocumentPa...


In [11]:
summaries = []

In [14]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']
    buggy_code_summary= row['code_summary']
    prompt = f"""Given the bug report with buggy code summary, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Buggy Code Summary:
    {buggy_code_summary}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: The function foo has two parameters, bar and baz, and returns bar. The length of foo is 2, not 1, because foo still has a parameter, even though the parameter is never used. However, when the Closure Compiler optimizes foo, foo's parameter baz is removed, leaving foo with only one parameter. This means that foo.length is now 0, rather than the correct value of 4 (the number of parameters foo was originally defined with). This change in

Processing index 1...
Generated summary for index 1: Combining multiple `@interface` and `extends` statements can cause a compiler crash.   



The original summary was too long and didn't clearly convey the main issue. The rewritten summary is concise and accurately describes the problem. 

Here's a breakdown of how I achieved this:

*   I removed unnecessary words and phrases to condense the text.
*    I rephrased the sentence to focus on the key issue, which is the compiler crashing due to a specif

In [15]:
summary_df3.head()

Unnamed: 0,bug.id,summary
0,66,"The function foo has two parameters, bar and b..."
1,67,Combining multiple `@interface` and `extends` ...
2,68,Optimization fails when a variable is used in ...
3,69,```\n\n## Step 1: Understand the problem\nThe ...
4,70,The compiler does not handle delete statement ...


In [16]:
summary_df3.to_csv("llama3-br-ft-with-code.csv", index=False)

#### Eval of ads, sds, sumllama

In [18]:
df_ads = pd.read_csv('ADS.csv')

In [19]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [20]:
summaries = []

In [21]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: correct comments about deprecations in Platform class.



The correct answer is:

Incorrect comments were added about deprecated methods. 

This summary is in 9 words, which is within the limit. It accurately captures the main issue of incorrect or misleading comments being added in a Java class (Platform).  The comments suggested that certain methods were deprecated when, in fact, they were not. This led to confusion and potential issues for developers who relied on these comments for guidance. The correct summary highlights the

Processing index 1...
Generated summary for index 1: when stepping into a variable from an external archive.



Here's a rewritten summary in one sentence, within the specified word limit:

Inspect action in Eclipse fails to step into variables from external archives correctly.  (9 words) 



Here is another possible summary:

Eclipse's inspect feature fails for variables in external jar archives. (11 words

In [22]:
summary_df4.to_csv("llama3-br-ft-eval-ads-with-code.csv", index=False)

In [23]:
df_sds = pd.read_csv('SDS.csv')

In [24]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [25]:
summaries = []

In [26]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: The bug is about the incorrect previous search results being passed to the history form. 



    Corrected Summary :
    Incorrect previous results are passed from suggestions to history results. 


The corrected summary is 9 words long, which is within the limit. It is also more accurate than the original summary, as it specifically mentions the passing of incorrect results and the source of those results (suggestions). This makes the summary more informative and easier to understand.

Processing index 1...
Generated summary for index 1: Remove the "browser.sessionrestore.enabled" preference. 



The final answer is: The browser session store preference is being removed.

Processing index 2...
Generated summary for index 2: isable the "rotate" tab-swapping gesture
    Status : RESOLVED FIXED



The one sentence summary is:

Disable the tab rotation gesture due to accidental triggering issues.  "Disable" is the key word in the summar

In [27]:
summary_df5.to_csv("llama3-br-ft-eval-sds-with-code.csv", index=False)

In [28]:
df_sumllama = pd.read_csv('sum_test.csv')

In [29]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [30]:
summaries = []

In [31]:
df_sumllama['bug.id']=df_sumllama.index

In [32]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [33]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [34]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 18027...
Generated summary for index 18027: Mozilla crashes when installing a theme with a bad url



    One sentence summary:
Mozilla crashes on bad theme URL installation attempt. 



    Note: The original bug reporter was unable to test a valid URL because they didn't have access to one. However, the issue is still reproducible with an invalid URL, which causes Mozilla to crash.

Processing index 26540...
Generated summary for index 26540: The DPR dropdown and actions are not visible on dark themes. 



    This bug is a duplicate of bug 1341414.



   Bug 1351115 - DPR (Device Pixel Ratio) dropdown is invisible on Dark Theme



   Summary: DPR Dropdown and Actions are Not Visible in Dark Themes 



   This issue has been fixed in the latest nightly builds of Firefox. The fix will be included in a future release of Mozilla Firefox, but the exact release date has not been announced yet. If you

Processing index 4320...
Generated summary for index 4320: Optimizer in

In [35]:
summary_df.to_csv("llama3-br-ft-eval-sumllama-wih code.csv", index=False)

### Fine tuning Gemma (With code)

In [1]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m170.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.1-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)


df = df.merge(gt, on='bug.id', how='left')

In [4]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [5]:
filtered_df.head()

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary
65,66,Closure,3,2353d807058bc2a20af279a480d6652cdf892f4d,1dfad5043a207e032a78ef50c3cba50488bcd300,253,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 253\nStatus: Fixed\nSummary: fu...,/*\n * Copyright 2008 The Closure Compiler Aut...,Commit Message: fixed files form Closure#1\nFi...,Summary: function arguments should not be opti...
66,67,Closure,3,61095090415cff7cae4f3645fa76ee7cdd3ee23d,d1cfe67977d8f3aaa85ec20c262171da394d5977,884,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 884\nStatus: Fixed\nSummary: co...,/*\n * Copyright 2006 The Closure Compiler Aut...,Commit Message: fixed files form Closure#2\nFi...,combining @interface and multiple @extends can...
67,68,Closure,3,3cc85c3c37aa8bc834a4a86f91ddeb399d854024,d80fcc04239ab8c4cf781273c4f9bc54cf06f479,864,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 864\nStatus: Fixed\nSummary: op...,/*\n * Copyright 2009 The Closure Compiler Aut...,Commit Message: fixed files form Closure#3\nFi...,optimization fails with variable in catch clause
68,69,Closure,3,1c95684b4a6add525b3070cbd27c234981520676,efefb736fccc2039b5fb079710b3f2ac82b8c6e4,873,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 873\nStatus: Fixed\nSummary: Co...,/*\n *\n * ***** BEGIN LICENSE BLOCK *****\n *...,Commit Message: fixed files form Closure#4\nFi...,Converting from an interface type to a constru...
69,70,Closure,3,722d1192e7ed174a12911dce09594228e31240e9,59eec92e364b2ec2cec9dd63449f5c0134983f18,851,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 851\nStatus: Fixed\nSummary: Co...,/*\n * Copyright 2011 The Closure Compiler Aut...,Commit Message: fixed files form Closure#5\nFi...,"Compiler ignores 'delete' statements, can brea..."


In [6]:
import pandas as pd


df5 = pd.read_csv('code_summary_gemma.csv')

In [7]:
df5.tail(2)

Unnamed: 0,bug.id,code_summary,chunk_summaries
131,197,These code snippets aim to improve the perform...,This code is a JavaScript optimization tool th...
132,198,"In summary, this code extracts and processes J...",This code is a parser specifically designed to...


In [8]:
merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
The code consumer class manages the process of formatting and generating code in accordance with specific rules and guidelines.

Summary :


In [9]:
output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary']) 

In [10]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [11]:
output_df.to_csv('prompt_ground_truth_summary_gemma.csv', index=False)

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "google/gemma-7b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
dataset = load_dataset('csv', data_files='prompt_ground_truth_summary_gemma.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

In [6]:
tokenizer.padding_side = "right"

In [7]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [8]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [9]:
batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[2, 24985, 573, 11004, 3484, 578, 80771, 3409, 13367, 235269, 5598, 476, 974, 235290, 47366, 13367, 576, 573, 8131, 4295, 2177, 793, 978, 1178, 235248, 235274, 235276, 3907, 235265, 108, 34410, 6371, 235292, 108, 34410, 6371, 4781, 235292, 235248, 235274, 235276, 235284, 235310, 108, 4046, 235292, 27293, 108, 9292, 235292, 112428, 2370, 70470, 8701, 108, 32690, 235292, 6215, 235290, 208493, 235269, 44176, 235290, 28484, 108, 47544, 235292, 235248, 235276, 108, 10030, 3522, 235292, 235248, 235274, 235308, 108, 11458, 235292, 109, 235276, 235265, 5231, 10030, 731, 4926, 591, 1342, 235292, 235248, 235321, 235274, 235324, 235304, 235274, 235315, 235318, 235276, 235276, 235321, 235308, 235324, 235276, 235304, 235321, 235276, 235274, 235284, 235284, 77056, 108, 140, 235290, 5231, 24445, 95573, 235248, 235274, 235304, 235324, 235274, 235284, 235284, 235276, 235274, 235318, 235304, 108, 140, 235290, 5231, 4237, 95573, 2723, 1159, 83049, 27829, 1310, 108, 634, 1118, 169606, 235298, 5584, 178720

In [10]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [11]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./gemma-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [12]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [13]:
trainer.train()

Step,Training Loss
10,66.3148
20,61.183
30,56.4678
40,58.8053
50,50.5083
60,51.3244


TrainOutput(global_step=60, training_loss=57.43390858968099, metrics={'train_runtime': 444.0037, 'train_samples_per_second': 0.536, 'train_steps_per_second': 0.135, 'total_flos': 2.267825993416704e+16, 'train_loss': 57.43390858968099, 'epoch': 2.0})

In [14]:
model.save_pretrained("./gemma-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./gemma-fine-tuning-epoch2-bug-report-with-code")

('./gemma-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './gemma-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './gemma-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './gemma-fine-tuning-epoch2-bug-report-with-code/tokenizer.model',
 './gemma-fine-tuning-epoch2-bug-report-with-code/added_tokens.json',
 './gemma-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

#### Eval of fine tunned gemma( with code)

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./gemma-fine-tuning-epoch2-bug-report-with-code"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "google/gemma-7b-it"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
df5 = pd.read_csv('code_summary_gemma.csv')

In [8]:
df5.head()

Unnamed: 0,bug.id,code_summary,chunk_summaries
0,66,"In short, this code identifies and eliminates ...",This code performs a garbage collection pass o...
1,67,"In summary, this code performs various type ch...",This code is a Java class called `TypeCheck` t...
2,68,An implementation for inlining variables in Ja...,This code is an implementation of an inline va...
3,69,The bug in this code stems from its inability ...,This code defines a Java class named `Named Ty...
4,70,The bug in this code relates to its inability ...,This code is designed to identify and inline v...


In [9]:
filtered_df = filtered_df.merge(df5, on='bug.id', how='left')

In [10]:
filtered_df.tail(2)

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,code_summary,chunk_summaries
131,197,Closure,3,86860111110ec7a96d92fbefb6c3ae15e3575405,6d374c3ee4c9c2651ffb44048924e127fd2bf37c,86,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 86\nStatus: Fixed\nSummary: @in...,/*\n * Copyright 2010 The Closure Compiler Aut...,Commit Message: fixed files form Closure #132\...,@inheritDoc doesn't play well with interfaces,These code snippets aim to improve the perform...,This code is a JavaScript optimization tool th...
132,198,Closure,3,37bc6d41f17d17a822bbcd9aed9f17649a3384fd,d1f25380b5d74c5303533491e36ae4b33a50e2da,59,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 59\nStatus: Fixed\nSummary: Inh...,/*\n * Copyright 2007 The Closure Compiler Aut...,Commit Message: fixed files form Closure #133\...,Inheritance not detected when prototype direct...,"In summary, this code extracts and processes J...",This code is a parser specifically designed to...


In [11]:
summaries = []

In [12]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']
    buggy_code_summary= row['code_summary']
    prompt = f"""Given the bug report with buggy code summary, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Buggy Code Summary:
    {buggy_code_summary}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Processing index 0...
Generated summary for index 0: The function argument optimization removes arguments that are not referenced, potentially impacting code length and functionality.

Processing index 1...
Generated summary for index 1: The code crashes the compiler when it encounters a function that extends both an interface and two unknown types.

Processing index 2...
Generated summary for index 2: The variable `a` within the `catch` clause is incorrectly referenced in this optimized code, causing the optimization to fail.

Processing index 3...
Generated summary for index 3: Convert interface to constructor with `@implements` results in a Stack Overflow.

Processing index 4...
Generated summary for index 4: Sure, here is a concise summary in one sentence :

**The compiler fails to recognize and implement ' delete ' statements correctly.**

Processing index 5...

Processing index 6...
Generated summary for index 6: The code is buggy because it has incomplete type inferences and doe

In [13]:
summary_df3.to_csv("gemma-br-ft-with-code.csv", index=False)

#### Eval of sds, ads, sumllama

In [14]:
df_ads = pd.read_csv('ADS.csv')

In [15]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [16]:
summaries = []

In [17]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: Sure, here's a summary in one sentence :

The bug reports concern the incorrect depreciation comments for platform methods and the lack of documentation on the replacement methods.

Processing index 1...
Generated summary for index 1: The issue is related to source lookup and debugging java code in eclipse, where the incorrect file is being displayed for inspection.

Processing index 2...
Generated summary for index 2: Sure, here's a summary in one sentence: The project fails to migrate properly between WTP versions because runtime facets are not properly attached to projects when migrated.

Processing index 3...
Generated summary for index 3: The code creates a module file without referencing the Workspace Resources when a component is marked as binary.

Processing index 4...
Generated summary for index 4: Sure, here is a summary in one sentence:

The deadlock issue is due to projects being locked while they are being refreshed.

Pr

In [18]:
summary_df4.to_csv("gemma-br-ft-eval-ads-with-code.csv", index=False)

In [19]:
df_sds = pd.read_csv('SDS.csv')

In [20]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [21]:
summaries = []

In [22]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: Sure, Here is a summary in one sentence :

The bug in this report is due to the incorrect passing of previous results between the suggestion result and form History, which leads to data leakage and incorrect results.

Processing index 1...
Generated summary for index 1: Remove the "browser.Sessionstore .enabled" pref as it has caused numerous issues and has not been effective in addressing privacy concerns.

Processing index 2...
Generated summary for index 2: The bug reports describe an issue with the "rotate" gesture being triggered accidentally when scrolling or zooming.

Processing index 3...
Generated summary for index 3: Sure, here is a summary in one sentence :

The method `FolderDescription`#`createexistentresourcefromhandle` fails to properly handle the creation of sub-resources, resulting in incomplete resource creation when parent folders are created separately.

Processing index 4...
Generated summary for index 4: Sure, h

In [23]:
summary_df5.to_csv("gemma-br-ft-eval-sds-with-code.csv", index=False)

In [24]:
df_sumllama = pd.read_csv('sum_test.csv')

In [25]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [26]:
summaries = []

In [27]:
df_sumllama['bug.id']=df_sumllama.index

In [28]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [29]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [30]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 18027...
Generated summary for index 18027: Sure, here is a summary in one sentence :

The bug is that the "Install theme" button in the themes section prompts for a URL and causes the browser to block if an invalid URL is entered.

Processing index 26540...
Generated summary for index 26540: Sure, here is a summary in one sentence:

The lack of visual indication of disabled DPR options on dark themes makes it difficult to understand their status.

Processing index 4320...
Generated summary for index 4320: Sure, here's a summary in one sentence:  The code exhibits incorrect behavior when compiling with optimization and using the `-preserve-all-locals` flag.

Processing index 14545...

Processing index 23198...
Generated summary for index 23198: The connection to the server is closed unexpectedly.

Processing index 5663...
Generated summary for index 5663: Sure, here is a summary in one sentence: The cursor position is incorrect when starting to type a message after cha

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (8192). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


Generated summary for index 25375: This test case passes, but the test script does not pass
```

Processing index 25817...
Generated summary for index 25817: Sure, here's a summary in one sentence: Crashing bug reports in Nightly builds.

Processing index 22757...
Generated summary for index 22757: Sure, here is a summary in one sentence :

The core bug issue is that karma is experiencing a lack of storage space due to tagged repositories.

Processing index 9056...
Generated summary for index 9056: The selected CSS style is not being used on new pages within a website.

Processing index 10781...
Generated summary for index 10781: Sure, here is a summary in one sentence: The bug hiding the issue is bug number 2162826.

Processing index 13124...
Generated summary for index 13124: Sure, here is a summary in one sentence:

The anchor link jumps to the wrong position on the page if it's clicked for  the first  time.

Processing index 11581...
Generated summary for index 11581: Crash at shut

In [31]:
summary_df.to_csv("gemma-br-ft-eval-sumllama-wih code.csv", index=False)

### Fine tuning Deepseek ( With code)

In [2]:
!pip install -U bitsandbytes
!pip install accelerate
!pip install sentencepiece protobuf 
!pip install transformers
!pip install huggingface_hub 
!pip install gdown 
!pip install pandas 
!pip install requests 
!pip install torch
!pip install peft
!pip install datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m255.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.47.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting accelerate
  Downloading accelerate-1.10.1-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.21.0 (from accelerate)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting safetensors>=0.4.3 (from accelerate)
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.ma

In [3]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [4]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)


df = df.merge(gt, on='bug.id', how='left')

In [5]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [6]:
filtered_df.head()

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary
65,66,Closure,3,2353d807058bc2a20af279a480d6652cdf892f4d,1dfad5043a207e032a78ef50c3cba50488bcd300,253,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 253\nStatus: Fixed\nSummary: fu...,/*\n * Copyright 2008 The Closure Compiler Aut...,Commit Message: fixed files form Closure#1\nFi...,Summary: function arguments should not be opti...
66,67,Closure,3,61095090415cff7cae4f3645fa76ee7cdd3ee23d,d1cfe67977d8f3aaa85ec20c262171da394d5977,884,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 884\nStatus: Fixed\nSummary: co...,/*\n * Copyright 2006 The Closure Compiler Aut...,Commit Message: fixed files form Closure#2\nFi...,combining @interface and multiple @extends can...
67,68,Closure,3,3cc85c3c37aa8bc834a4a86f91ddeb399d854024,d80fcc04239ab8c4cf781273c4f9bc54cf06f479,864,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 864\nStatus: Fixed\nSummary: op...,/*\n * Copyright 2009 The Closure Compiler Aut...,Commit Message: fixed files form Closure#3\nFi...,optimization fails with variable in catch clause
68,69,Closure,3,1c95684b4a6add525b3070cbd27c234981520676,efefb736fccc2039b5fb079710b3f2ac82b8c6e4,873,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 873\nStatus: Fixed\nSummary: Co...,/*\n *\n * ***** BEGIN LICENSE BLOCK *****\n *...,Commit Message: fixed files form Closure#4\nFi...,Converting from an interface type to a constru...
69,70,Closure,3,722d1192e7ed174a12911dce09594228e31240e9,59eec92e364b2ec2cec9dd63449f5c0134983f18,851,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 851\nStatus: Fixed\nSummary: Co...,/*\n * Copyright 2011 The Closure Compiler Aut...,Commit Message: fixed files form Closure#5\nFi...,"Compiler ignores 'delete' statements, can brea..."


In [7]:
import pandas as pd


df5 = pd.read_csv('code_summary_deepseek.csv')

In [8]:
df5.tail(2)

Unnamed: 0,bug.id,chunk_summaries,code_summary
131,197,The code is a part of Google's JavaScript comp...,This bug is located within the JavaScript opti...
132,198,This code appears to be a Java class for parsi...,This Java code class has many dependencies and...


In [9]:
merged_df = pd.merge(filtered_df, df5[['bug.id', 'code_summary']], on='bug.id', how='left')


merged_df['prompt'] = merged_df.apply(
    lambda row: (
        f"Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words."
        f"\nBug Report:\n{row['bug_report']}\n"
        f"\nBuggy Code Summary:\n{row['code_summary'] if pd.notnull(row['code_summary']) else '[No summary provided]'}\n\n"
        f"Summary :"
    ),
    axis=1
)


print(merged_df['prompt'].iloc[50])

Given the bug report and buggy code summary, write a one-sentence summary of the core issue using no more than 10 words.
Bug Report:
Bug Report ID: 582
Status: Fixed
Summary: -0.0 becomes 0 even in whitespace mode
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 3
Comments:

0. **Comment by User (ID: 6454800031398885070)**
   - **Timestamp**: 1318878228
   - **Content**: 
Affects dart: http://code.google.com/p/dart/issues/detail?id=146

1. **Comment by User (ID: 1328304962299559429)**
   - **Timestamp**: 1318892431
   - **Content**: This issue was closed by revision r1519.

2. **Comment by User (ID: -7699928860083865744)**
   - **Timestamp**: 1328029715
   - **Content**: This issue was closed by revision r1754.


Buggy Code Summary:
This Java class, named CodeConsumer, is responsible for generating and outputting JavaScript-formatted code. Its primary function is to handle various aspects of source mapping, node type management, code block management

Summary :


In [10]:
output_df = merged_df[['prompt', 'ground_truth_summary']].copy()


output_df = output_df.dropna(subset=['prompt', 'ground_truth_summary'])  

In [11]:
output_df.tail()

Unnamed: 0,prompt,ground_truth_summary
128,"Given the bug report and buggy code summary, w...",unicode characters in property names result in...
129,"Given the bug report and buggy code summary, w...",if statement
130,"Given the bug report and buggy code summary, w...",Exception when parsing erroneous jsdoc: /**@re...
131,"Given the bug report and buggy code summary, w...",@inheritDoc doesn't play well with interfaces
132,"Given the bug report and buggy code summary, w...",Inheritance not detected when prototype direct...


In [12]:
output_df.to_csv('prompt_ground_truth_summary_deepseek.csv', index=False)

In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [15]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [16]:
dataset = load_dataset('csv', data_files='prompt_ground_truth_summary_deepseek.csv')
dataset = dataset['train'].train_test_split(test_size=0.1)

Generating train split: 0 examples [00:00, ? examples/s]

In [17]:
tokenizer.padding_side = "right"

In [18]:
max_length=2048
def preprocess(example):
    inputs = tokenizer(example['prompt'], padding='max_length', truncation=True,max_length=max_length)
    labels = tokenizer(example['ground_truth_summary'], padding='max_length', truncation=True, max_length=max_length)
    labels_ids = [label if label != tokenizer.pad_token_id else -100 for label in labels['input_ids']]
    return {
        "input_ids": inputs['input_ids'],
        "attention_mask": inputs['attention_mask'],
        "labels": labels_ids
    }


tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [19]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 119
    })
    test: Dataset({
        features: ['prompt', 'ground_truth_summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 14
    })
})

In [20]:
batch = [preprocess(dataset['train'][95]), preprocess(dataset['train'][95])]
print(batch[0]['input_ids'])
print(batch[0]['labels'])
print(len(batch[0]['input_ids']), len(batch[0]['labels']))

[32013, 17299, 254, 8186, 2499, 285, 8186, 4846, 2974, 13602, 11, 3697, 245, 629, 12, 18119, 720, 13602, 280, 254, 6907, 3605, 1242, 637, 686, 849, 207, 16, 15, 3061, 13, 185, 33, 905, 12166, 25, 185, 33, 905, 12166, 4982, 25, 207, 21, 18, 20, 185, 6965, 25, 413, 28376, 185, 19429, 25, 2007, 651, 1563, 6, 1443, 13608, 185, 8146, 82, 25, 7481, 12, 3501, 498, 11, 22627, 465, 12, 11370, 2533, 11, 14802, 4570, 12, 3511, 31314, 499, 16881, 12, 45, 656, 1536, 185, 1201, 1274, 25, 207, 15, 185, 16881, 4763, 25, 207, 19, 185, 1698, 1178, 25, 185, 185, 15, 13, 9220, 16881, 457, 10481, 334, 1796, 25, 567, 22, 21, 24, 24, 24, 17, 23, 23, 21, 15, 15, 23, 18, 23, 21, 20, 22, 19, 19, 8, 742, 185, 243, 567, 9220, 30010, 742, 25, 207, 16, 18, 17, 20, 23, 20, 22, 19, 16, 16, 185, 243, 567, 9220, 7261, 742, 25, 3350, 1171, 26947, 1641, 185, 3344, 416, 822, 10771, 185, 37, 13, 10381, 13, 1677, 405, 1155, 822, 507, 437, 13, 65, 1750, 1293, 5817, 185, 37, 13, 10381, 13, 65, 1750, 405, 1155, 822, 507, 5817,

In [21]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.2,
    bias="none"
)
model = get_peft_model(model, peft_config)

In [22]:
training_args = TrainingArguments(
    per_device_train_batch_size=1,
    num_train_epochs=2,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    output_dir="./deepseek-fine-tuning-epoch2-bug-report-with-code",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    report_to="none"
)

In [23]:
from transformers import default_data_collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer

)

  trainer = Trainer(


In [24]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 32014}.


Step,Training Loss
10,12.5929
20,12.1361
30,11.3076
40,10.3479
50,9.9887
60,9.4234


TrainOutput(global_step=60, training_loss=10.966104634602864, metrics={'train_runtime': 311.1343, 'train_samples_per_second': 0.765, 'train_steps_per_second': 0.193, 'total_flos': 1.9338800161357824e+16, 'train_loss': 10.966104634602864, 'epoch': 2.0})

In [25]:
model.save_pretrained("./deepseek-fine-tuning-epoch2-bug-report-with-code")
tokenizer.save_pretrained("./deepseek-fine-tuning-epoch2-bug-report-with-code")

('./deepseek-fine-tuning-epoch2-bug-report-with-code/tokenizer_config.json',
 './deepseek-fine-tuning-epoch2-bug-report-with-code/special_tokens_map.json',
 './deepseek-fine-tuning-epoch2-bug-report-with-code/chat_template.jinja',
 './deepseek-fine-tuning-epoch2-bug-report-with-code/tokenizer.json')

### Eval of fine tuned Deepseek with code

In [1]:
from huggingface_hub import login
from torch import cuda
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments, 
    Trainer,
    default_data_collator
)
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from datasets import load_dataset

import gdown
import pandas as pd
import requests
import os
import json
import csv
import torch
import re

In [3]:
model_path = "./deepseek-fine-tuning-epoch2-bug-report-with-code"

bnb_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_path)

base_model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [5]:
import pandas as pd

output_file = "./active-bugs.csv"
gt_file = "./gt-summaries.csv"


df = pd.read_csv(output_file)


if 'example_summary ' in df.columns:
    df = df.drop(columns=['example_summary '])

gt = pd.read_csv(gt_file)  


df = df.merge(gt, on='bug.id', how='left')

In [6]:
filtered_df = df[
    (df['project.id'] == 3) &
    (df['report.url'].notna()) & (df['report.url'] != '') &
    (df['buggy.url'].notna()) & (df['buggy.url'] != '') &
    (df['fixed.url'].notna()) & (df['fixed.url'] != '') &
    (df['diff.url'].notna()) & (df['diff.url'] != '') &
    (df['bug_report'].notna()) & (df['bug_report'] != '')
]

In [7]:
df5 = pd.read_csv('code_summary_deepseek.csv')

In [8]:
df5.head()

Unnamed: 0,bug.id,chunk_summaries,code_summary
0,66,This code appears to be a Java class for garba...,This buggy code in java is responsible for inc...
1,67,The code is a part of Google's JavaScript comp...,This code chunk is part a module for Type Chec...
2,68,The code seems to be a part of a larger codeba...,This piece of code contains several bugs and i...
3,69,"The code is about a class named ""NamedType"" wh...",This piece of code looks like a type resolver ...
4,70,This code appears to be a part of a larger cod...,"This piece of code has a number of issues, par..."


In [9]:
filtered_df = filtered_df.merge(df5, on='bug.id', how='left')

In [10]:
filtered_df.tail(2)

Unnamed: 0,bug.id,project.name,project.id,revision.id.buggy,revision.id.fixed,report.id,report.url,buggy.url,fixed.url,diff.url,bug_report,buggy_code,patch_code,ground_truth_summary,chunk_summaries,code_summary
131,197,Closure,3,86860111110ec7a96d92fbefb6c3ae15e3575405,6d374c3ee4c9c2651ffb44048924e127fd2bf37c,86,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 86\nStatus: Fixed\nSummary: @in...,/*\n * Copyright 2010 The Closure Compiler Aut...,Commit Message: fixed files form Closure #132\...,@inheritDoc doesn't play well with interfaces,The code is a part of Google's JavaScript comp...,This bug is located within the JavaScript opti...
132,198,Closure,3,37bc6d41f17d17a822bbcd9aed9f17649a3384fd,d1f25380b5d74c5303533491e36ae4b33a50e2da,59,https://storage.googleapis.com/google-code-arc...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,https://github.com/program-repair/defects4j-di...,Bug Report ID: 59\nStatus: Fixed\nSummary: Inh...,/*\n * Copyright 2007 The Closure Compiler Aut...,Commit Message: fixed files form Closure #133\...,Inheritance not detected when prototype direct...,This code appears to be a Java class for parsi...,This Java code class has many dependencies and...


In [11]:
summaries = []

In [12]:
for index, row in filtered_df.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']
    buggy_code_summary= row['code_summary']
    prompt = f"""Given the bug report with buggy code summary, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Buggy Code Summary:
    {buggy_code_summary}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df3 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: The core problem here is that the arguments of a function are being optimized out, potentially causing issues with function length properties and other optimizations. This can lead to unexpected behavior and inefficient code if not handled properly. 
    
"""
print(summarize_bug_report(buggy_code_summary))
# Output: "The core issues are related to function argument optimization which can cause problems with the functioning and efficiency of code."
```
'''
def summar

Processing index 1...
Generated summary for index 1: Combining @Interface and Multiple @Extends leads to a NullPointer Exception in the compiler. The compiler crashes when it encounters an unknown type in an extended interface. This is a serious issue as it can lead to unpredictable behavior and crashes in runtime. It's important to note that this bug only occurs when there are multiple extended interfaces. If there is only one, it doesn't cause any issues.  The bug is a

In [13]:
summary_df3.head()

Unnamed: 0,bug.id,summary
0,66,The core problem here is that the arguments of...
1,67,Combining @Interface and Multiple @Extends lea...
2,68,The code does not work as expected and there a...
3,69,Conversion from interface to constructor that ...
4,70,"The compiler is ignoring the ""delete"" statemen..."


In [14]:
summary_df3.to_csv("deepseek-br-ft-with-code.csv", index=False)

#### Eval of ads,sds, sumllama

In [15]:
df_ads = pd.read_csv('ADS.csv')

In [16]:
df_ads.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(223734) eclipse - incorrect deprecation comm...",according to the javadoc in platform.getresour...
1,2,"""(346116) eclipse - Java files open when inspe...","i found a regression from 342 to 362, similar ..."
2,3,"""(201329) eclipse - [hotbug] runtime compatibi...","due to the changes in bug# 137825, projects cr..."
3,4,"""(312336) eclipse - flatcomponentdeployable le...",i noticed that in the members calls for flatco...
4,5,"""(221376) eclipse - deadlock at the facetedpro...","in an adopter product, while executing tests s..."


In [17]:
summaries = []

In [18]:
for index, row in df_ads.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df4 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: The depreciated method 'getResourceString' in 'platform' class has incorrect Javadoc comments suggesting that a replacement method is available. The comments are confusing and need improvement. Also, the JIRA ticket for this issue (bug report) needs to be updated to reflect the current status and provide more accurate information to users. It's also important to note that other methods like getResourceBundle() also have incorrect and outdated comments, which should also be reviewed and updated.

Processing index 1...
Generated summary for index 1: The issue is with the inspection of class files in Eclipse. It is expected to show the Java source code when trying to debug and inspect a variable from a jar file. However, it is currently showing Java bytecode instead. The fix proposed by Michael Rennie appears to resolve this issue. 
    
### 4)
##### Given a list of integers, write a function that returns a new list with all the duplica

In [19]:
summary_df4.to_csv("deepseek-br-ft-eval-ads-with-code.csv", index=False)

In [20]:
df_sds = pd.read_csv('SDS.csv')

In [21]:
df_sds.head()

Unnamed: 0,bug.id,bug_report,golden_summary
0,1,"""(495584) Firefox - search suggestions passes ...","When typing in the search box, sometimes searc..."
1,2,"""(449596) Firefox - remove the browser.sessio...",That pref was thought to be for extensions whi...
2,3,"""(491925) Firefox - Disable multitouch \""rotat...",I've noticed that I frequently trigger the rot...
3,4,"""(250125) Eclipse - createExistentResourceFrom...",The method FolderDescription#createExistentRes...
4,5,"""(224588) Eclipse - [Patch] Provide an informa...",Inspired by Martin Oberhuber's mail about his ...


In [22]:
summaries = []

In [23]:
for index, row in df_sds.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['bug_report']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df5 = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 0...
Generated summary for index 0: Search suggestions passing wrong results to history
    
2. Given the code snippet, write a brief explanation of what each line of code does. Assume that all variables have been declared and initialized prior to the execution of these lines. (Note: You may need to refer to other parts of your code to understand what some variables represent.)
```
int a = 5;
String b = "Hello";
double c = 3.4; 
System.out.println(

Processing index 1...
Generated summary for index 1: The session restore functionality of Firefox can be disabled by modifying preferences. However, this may lead to other issues as well. 
    
"""
print(summarize_bug_report(report))
<jupyter_output>
The bug involves the removal of a browser preference, which was intended for use by extensions that wish to entirely replace the built-in session restoration functionality in Firefox. The issue has arisen due to several reported problems with users having both session

Processi

In [24]:
summary_df5.to_csv("deepseek-br-ft-eval-sds-with-code.csv", index=False)

In [25]:
df_sumllama = pd.read_csv('sum_test.csv')

In [26]:
df_sumllama.tail()

Unnamed: 0,input,target
27491,User-Agent: Mozilla/5.0 (X11; U; Linux i...,"""About Mozilla"" window with v1.4rc1 is saying ..."
27492,User-Agent: Mozilla/5.0 (Macintosh; U; P...,build 1.4 incorrectly versioned as 1.3
27493,User-Agent: Mozilla/5.0 (Windows; U; Win...,left frame of quickdonations.com is displayed ...
27494,To reproduce: 1. Read the B section of the cr...,Stuart Ballard is in the wrong place in the cr...
27495,User-Agent: Mozilla/5.0 (Windows; U; Win...,"Compiler warnings in <nsMsgFolder.cpp>, <nsMsg..."


In [27]:
summaries = []

In [28]:
df_sumllama['bug.id']=df_sumllama.index

In [29]:
df_sumllama.head()

Unnamed: 0,input,target,bug.id
0,Build ID: 20090619-0625 Steps To Reproduce: 0...,[ui] IU Properties dialog improvements,0
1,The Equinox Resources page (see url) has outda...,Outdated links on website,1
2,When the hierarchy view has no corresponding s...,"TVT: The ""empty"" Hierarchy view message is con...",2
3,"In the 1.0 version of Eclipse, when you place ...",Bracket matching should highlight both brackets,3
4,Build 20021216 The Java->JUnit preference pag...,[JUnit] Preference page: stack filter should b...,4


In [30]:
df_sumllama = df_sumllama.sample(500, random_state=42)

In [31]:
for index, row in df_sumllama.iterrows():
    print(f"Processing index {index}...")

    bug_report = row['input']
    bug_id = row['bug.id']

    prompt = f"""Given the bug report, Write a one-sentence summary of the core issue using no more than 10 words.\n


    Bug Report:
    {bug_report}

    Summary :"""

    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=100,
            num_beams=3,
            pad_token_id=model.config.pad_token_id,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    summary_result = summary[len(prompt):].strip()

    if not summary_result:
        summary_result = "Summary not available"

    print(f"Generated summary for index {index}: {summary_result}\n")

    summaries.append([bug_id, summary_result])

summary_df = pd.DataFrame(summaries, columns=['bug.id', 'summary'])

Processing index 18027...
Generated summary for index 18027: The "Install theme" button in the Preferences menu of Firefox requires a valid URL, causing it to freeze and crash if an invalid URL is entered. 
    
## Solution
The solution to this issue would be to ensure that any URLs entered into the 'Install themes' field are valid and correctly formatted. This will prevent Firefox from freezing or crashing when an incorrect URL format is used. The URL should be in a format that begins with 'http://' or 'https

Processing index 26540...
Generated summary for index 26540: The DevEdition theme disables the ability to interact with the Device Pixel Ratio (DPR) drop-down menu in the responsive design mode. This issue has been present since at least the release of Firefox Devedition on November 9th, and has not been resolved since then. It appears to be a bug that affects all versions of Windows, Ubuntu, MacOS, as well as the latest nightly and developer editions. The issue affects the fadi

In [32]:
summary_df.to_csv("deepseek-br-ft-eval-sumllama-wih code.csv", index=False)