In [None]:
summaries = []

def generate_final_summary(bug_report, patch_code):
    prompt = f"""You are a senior software engineer helping to analyze this patch code. Summarize this piece of patch code in 1-2 sentences:

Patch Code:
{patch_code}

Summary:"""

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=2048).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            pad_token_id=model.config.pad_token_id,
            max_new_tokens=100,
            num_beams=3,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    patch_code_summary = tokenizer.decode(output[0], skip_special_tokens=True)
    patch_code_summary = patch_code_summary[len(prompt):].strip()

    final_prompt = f"""
Now, Given a Bug Report with Patch Code Summary, Write a one-sentence summary of the core issue using no more than 10 words.\n

Bug Report:
{bug_report}

Patch Code Summary:
{patch_code_summary}

Final Summary:"""

    inputs = tokenizer(final_prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)

    with torch.no_grad():
        output_final = model.generate(
            inputs['input_ids'],
            pad_token_id=model.config.pad_token_id,
            max_new_tokens=100,
            num_beams=3,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    final_summary = tokenizer.decode(output_final[0], skip_special_tokens=True)
    return final_summary[len(final_prompt):].strip(), patch_code_summary



for idx, row in filtered_df.iterrows():
    bug_id = row['bug.id']
    bug_report = str(row['bug_report']) if pd.notna(row['bug_report']) else ""
    patch_code = str(row['patch_code']) if pd.notna(row['patch_code']) else ""

    print(f"Processing Bug ID: {bug_id}")

    try:
        if patch_code.strip():
            final_summary, patch_code_summary = generate_final_summary(bug_report, patch_code)
            print(final_summary)
        else:
            final_summary = "No patch code provided."
            patch_code_summary = ""
    except Exception as e:
        final_summary = f"Error: {str(e)}"
        patch_code_summary = ""

    summaries.append({
        "bug.id": bug_id,
        "summary": final_summary,
        "patch_code_summary": patch_code_summary
    })

br_patch_code_zero_df = pd.DataFrame(summaries)


Processing Bug ID: 66
The issue has been resolved by this patch.
Processing Bug ID: 67
The issue was caused by a bug in the type checker where it did not properly handle the case where a type was an unknown type and it was used as a base type for another type, which caused a NPE. This was fixed by adding a check to make sure that the prototype is not null.
Processing Bug ID: 68
The issue is fixed by adding a method that can determine if the scope is flow-sensitive.
Processing Bug ID: 69
Inheritance Cycles Cause Stack Overflow.
Processing Bug ID: 70
"Compiler ignoring 'deletes' in object literals."
Processing Bug ID: 71
This patch improves the type checker by removing two unnecessary checks.
Processing Bug ID: 72
The patch fixes a bug in Rhino's type system.
Processing Bug ID: 73
A variable can be named in two ways: (1) as a function parameter, or (2) by being assigned a value. In the first case (i.e. function parameters), we do not want to collapse it because it is being used as an arg

In [None]:
def summarize_patch(patch_code):
    prompt = f"""You are a senior software engineer helping to analyze this patch code. Summarize this piece of patch code in 1-2 sentences:

Patch Code:
{patch_code}

Summary:"""

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=2048).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            pad_token_id=model.config.pad_token_id,
            max_new_tokens=100,
            num_beams=3,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    summary = tokenizer.decode(output[0], skip_special_tokens=True)
    return summary[len(prompt):].strip()

example_bug_report = filtered_df.iloc[5]['bug_report']
example_summary = filtered_df.iloc[5]['ground_truth_summary']
patch_code = filtered_df.iloc[5]['patch_code']
example_patch_code_summary = summarize_patch(patch_code)
print(example_patch_code_summary)


This patch removes two lines of code that check if the left and right types are constructors or enums, and if they are, it registers a mismatch. The patch also removes an else statement that does nothing.


In [None]:
summaries = []

def generate_final_summary(bug_report, patch_code):
    prompt = f"""You are a senior software engineer helping to analyze this patch code. Summarize this piece of patch code in 1-2 sentences:

Patch Code:
{patch_code}

Summary:"""

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=2048).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            pad_token_id=model.config.pad_token_id,
            max_new_tokens=100,
            num_beams=3,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    patch_code_summary = tokenizer.decode(output[0], skip_special_tokens=True)
    patch_code_summary = patch_code_summary[len(prompt):].strip()

    final_prompt = f"""Here is an example of a bug report, summarized patch code and its summary:
    Example Bug Report:
    {example_bug_report}

    Example Patch Code Summary:
    {example_patch_code_summary}

    Example Summary:
    {example_summary}

 Now, Given a Bug Report with Patch Code Summary, Write a one-sentence summary of the core issue using no more than 10 words. Avoid copying example text unless they naturally apply ; tailor the summary to the new bug report.\n

Bug Report:
{bug_report}

Patch Code Summary:
{patch_code_summary}

Final Summary:"""

    inputs = tokenizer(final_prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            pad_token_id=model.config.pad_token_id,
            max_new_tokens=100,
            num_beams=3,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    final_summary = tokenizer.decode(output[0], skip_special_tokens=True)
    return final_summary[len(final_prompt):].strip(), patch_code_summary



for idx, row in filtered_df.iterrows():
    bug_id = row['bug.id']
    bug_report = str(row['bug_report']) if pd.notna(row['bug_report']) else ""
    patch_code = str(row['patch_code']) if pd.notna(row['patch_code']) else ""

    print(f"Processing Bug ID: {bug_id}")

    try:
        if patch_code.strip():
            final_summary, patch_code_summary = generate_final_summary(bug_report, patch_code)
            print(final_summary)
        else:
            final_summary = "No patch code provided."
            patch_code_summary = ""
    except Exception as e:
        final_summary = f"Error: {str(e)}"
        patch_code_summary = ""

    summaries.append({
        "bug.id": bug_id,
        "summary": final_summary,
        "patch_code_summary": patch_code_summary
    })

br_patch_code_one_df = pd.DataFrame(summaries)


Processing Bug ID: 66
The issue is now fixed in Closure Compiler.
Processing Bug ID: 67
Adding a check to see if an implicit interface prototype is not null before setting its property name set to empty.
Processing Bug ID: 68
A variable that is declared in a catch block cannot be optimized because it is referenced outside of its scope.
Processing Bug ID: 69
Inheritance Cycle Detection
Processing Bug ID: 70
The compiler is ignoring the delete statement when rewriting object literals, causing functionality to break.
Processing Bug ID: 71
Better 'This' Type Check
Processing Bug ID: 72
A patch was made to fix the issue, but it is not clear what the patch does.
Processing Bug ID: 73
A patch was added to prevent collapsing of named parameters in functions.
Processing Bug ID: 74
The issue is that the compiler is not able to resolve the dependency of Bar on Foo, even though the files are in adjacent directories. This is due to a problem with the guessModuleName function, where it is unable to 

In [None]:
few_shot_examples = patch_merge_df.sample(3, random_state=42)

example_prompt = "Here are some examples of bug reports, patch code summaries and their summaries:\n\n"

for i, (_, row) in enumerate(few_shot_examples.iterrows(), 1):
    bug_report = row['bug_report']
    ground_truth = row['ground_truth_summary']
    patch_code_summary = row['patch_code_summary']

    example_prompt += (
        f"Example {i}:\n"
        f"Bug Report:\n{bug_report}\n\n"
        f"Patch Code Summary:\n{patch_code_summary}\n\n"
        f"Summary:\n{ground_truth}\n\n"
        + "="*10 + "\n\n"
    )

example_prompt = example_prompt.strip()
print(example_prompt)


Here are some examples of bug reports, patch code summaries and their summaries:

Example 1:
Bug Report:
Bug Report ID: 569
Status: Fixed
Summary: Converts string properties into numbers in literal object definitions
Labels: Type-Defect, Priority-Medium
Stars: 0
Comment Count: 5
Comments:

0. **Comment by User (ID: 2969747056394285933)**
   - **Timestamp**: 1317668182
   - **Content**: <b>What steps will reproduce the problem?</b>
1. Minimize the following script:

var lit = {&quot;0102&quot;:&quot;Zero One Zero Two&quot;};
alert(lit[&quot;0102&quot;]);

<b>What is the expected output? What do you see instead?</b>

Expected:
var lit={&quot;0102&quot;:&quot;Zero One Zero Two&quot;};alert(lit[&quot;0102&quot;]);

Actual:
var lit={102:&quot;Zero One Zero Two&quot;};alert(lit[&quot;0102&quot;]);

<b>What version of the product are you using? On what operating system?</b>

r1459

<b>Please provide any additional information below.</b>


1. **Comment by User (ID: 2969747056394285933)**
   - 

In [None]:
summaries = []

def generate_final_summary(bug_report, patch_code_summary):

    final_prompt = f"""{example_prompt}
 Now, Given a Bug Report with Patch Code Summary, Write a one-sentence summary of the core issue using no more than 10 words. Avoid copying example text unless they naturally apply ; tailor the summary to the new bug report.\n

Bug Report:
{bug_report}

Patch Code Summary:
{patch_code_summary}

Final Summary:"""

    inputs = tokenizer(final_prompt, return_tensors="pt", truncation=True, padding=True).to(model.device)

    with torch.no_grad():
        output = model.generate(
            inputs['input_ids'],
            pad_token_id=model.config.pad_token_id,
            max_new_tokens=100,
            num_beams=3,
            top_k=50,
            early_stopping=True,
            no_repeat_ngram_size=2
        )

    final_summary = tokenizer.decode(output[0], skip_special_tokens=True)
    return final_summary[len(final_prompt):].strip(), patch_code_summary


for idx, row in patch_merge_df.iterrows():
    bug_id = row['bug.id']
    bug_report = str(row['bug_report']) if pd.notna(row['bug_report']) else ""
    patch_code_summary = str(row['patch_code_summary']) if pd.notna(row['patch_code_summary']) else ""

    print(f"Processing Bug ID: {bug_id}")

    try:
        if patch_code_summary.strip():
            final_summary, patch_code_summary = generate_final_summary(bug_report, patch_code_summary)
            print(final_summary)
        else:
            final_summary = "No patch code summary provided."
            patch_code_summary = ""
    except Exception as e:
        final_summary = f"Error: {str(e)}"
        patch_code_summary = ""

    summaries.append({
        "bug.id": bug_id,
        "summary": final_summary,
        "patch_code_summary": patch_code_summary
    })

br_patch_code_few_df = pd.DataFrame(summaries)


Processing Bug ID: 66
The issue is that the Closure compiler is removing arguments from functions even if they are used, even when the compilation level is simple optimizations. This patch prevents this from happening.
Processing Bug ID: 67
Combining @interfaces with multiple extends can cause the compiler to crash when the extends are of unknown types.
Processing Bug ID: 68
A variable cannot be inline if its scope is larger than the function it is defined in.
Processing Bug ID: 69
Inheritance cycles cause infinite recursion when converting from interface types to constructors that implement themselves.
Processing Bug ID: 70
The compiler is ignoring delete statement when rewriting internal object variable references.
Processing Bug ID: 71
Better 'This' Type Checking
Processing Bug ID: 72
The issue is that the type checker is not able to infer the correct type of an object when it is passed as an argument to a method. This is caused by a combination of factors, including the use of gene