# RQ1: Can LLM detect bug-fix-related diffs from tangled commits?

In [None]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd

from sklearn.metrics import classification_report, ConfusionMatrixDisplay, confusion_matrix
from modules.Untangler import GeminiUntangler, OpenAIUntangler, FreeUntangler

In [None]:
df = pd.read_csv("./data/Complete_GoldSet.csv")

In [None]:
os.makedirs("./Results/RQ1", exist_ok=True)

# Without Commit Message

## gpt-4o-mini

In [None]:
model_name = "gpt-4o-mini"
untangler = OpenAIUntangler(model_name, False, 0, False)
result = untangler.batch_detect(df, iteratively=False)
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))
cm = confusion_matrix(result["Decision"], result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

## gpt-4o

In [None]:
model_name = "gpt-4o"
untangler = OpenAIUntangler(model_name, False, 0, False)
result = untangler.batch_detect(df, iteratively=False)
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

## gemini-2.0-flash

In [None]:
model_name = "gemini-2.0-flash"
untangler = GeminiUntangler(model_name, False, 0, False)
result = untangler.batch_detect(df)
result["Detection"] = result["Detection"].apply(lambda x: x.strip())
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

## codellama/CodeLlama-7b-hf

In [None]:
model_name = "microsoft/Phi-3-mini-4k-instruct"
untangler = FreeUntangler(model_name, False, 0, False)
result = untangler.batch_detect(df[:2])
result["Detection"] = result["Detection"].apply(lambda x: x.strip())

model_name = model_name.replace("/","-")
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

# With Commit Message

## gpt-4o-mini

In [None]:
model_name = "gpt-4o-mini"
untangler = OpenAIUntangler(model_name, True, 0, False)
result = untangler.batch_detect(df, iteratively=False)
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))
cm = confusion_matrix(result["Decision"], result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

## gpt-4o

In [None]:
model_name = "gpt-4o"
untangler = OpenAIUntangler(model_name, True, 0, False)
result = untangler.batch_detect(df, iteratively=False)
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

## gemini-2.0-flash

In [None]:
model_name = "gemini-2.0-flash"
untangler = GeminiUntangler(model_name, True, 0, False)
result = untangler.batch_detect(df)
result["Detection"] = result["Detection"].apply(lambda x: x.strip())
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()

## codellama/CodeLlama-7b-hf

In [None]:
model_name = "microsoft/Phi-3-mini-4k-instruct"
untangler = FreeUntangler(model_name, True, 0, False)
result = untangler.batch_detect(df[:2])
result["Detection"] = result["Detection"].apply(lambda x: x.strip())

model_name = model_name.replace("/","-")
result.to_csv(f"./Results/RQ1/{model_name}.csv", index = False)

In [None]:
result = pd.read_csv(f"./Results/RQ1/{model_name}.csv")
print(classification_report(result["Decision"], result["Detection"]))

In [None]:
g = pd.read_csv("./data/GoldSet.csv")
filtered_result = result[result['CommitHash'].isin(g['CommitHash'])]
print(classification_report(filtered_result["Decision"], filtered_result["Detection"]))
cm = confusion_matrix(filtered_result["Decision"], filtered_result["Detection"], labels=["Buggy", "NotBuggy"])
fig = ConfusionMatrixDisplay(cm, display_labels=["Buggy", "NotBuggy"])
fig.plot()