# GPT-o1 for JIT-SDP

In [None]:
key = input('Key:')

In [2]:
from openai import OpenAI
client = OpenAI(api_key=key)

In [3]:
import pickle

In [6]:
pickle_file_path = 'data/features_train.pkl'

In [7]:
# Load the pickle file
with open(pickle_file_path, 'rb') as file:
    features_data = pickle.load(file)

### Comment + code + expert features

In [13]:
system_message = """You are a Just-In-Time Software Defect Prediction (JIT-SDP) system responsible for predicting 
    software defects at the time they are about to occur.  To do that, you will receive a commit 
    information. The commit information will have the following format:
    
    COMMIT MESSAGE: <a message describing the intent of the commit>
    ADDED LINES: {<a set of added lines>}
    REMOVED LINES: {<a set of removed lines>}
    NUMBER OF MODIFIED SUBSYSTEMS: <number of modified subsystems>
    NUMBER OF MODIFIED DIRECTORIES: <number of modified directories>
    NUMBER OF MODIFIED FILES: <number of modified files>
    ENTROPY (DISTRIBUTION OF MODIFIED CODE ACROSS EACH FILE): <distribution of modified code across each file>
    NUMBER OF LINES OF CODE ADDED: <number of lines of code added>
    NUMBER OF LINES OF CODE DELETED: <number of lines of code deleted>
    NUMBER OF LINES OF CODE IN A FILE BEFORE THE CHANGE: <number of lines of code in a file before the change>
    FIX (True if the change is a defect fix, False otherwise>): <True if the change is a defect fix, False otherwise>
    NUMBER OF DEVELOPERS THAT CHANGED THE MODIFIED FILES: <the number of developers that changed the modified files>
    AVERAGE TIME BETWEEN THE LAST AND THE CURRENT CHANGE: <the average time between the last and the current change>
    NUMBER OF UNIQUE CHANGES TO THE MODIFIED FILES: <the number of unique changes to the modified files>
    DEVELOPER EXPERIENCE: <developer experience>
    RECENT DEVELOPER EXPERIENCE: <recent developer experience>
    DEVELOPER EXPERIENCE ON THE CURRENT SUBSYSTEM: <developer experience in a subsystem>
    
    You will use the commit message, added/removed lines, and extra information to generate your answer.  You will answer with 
    "YES" or "NO" if the commit may introduce a bug and the reason why you reached your conclusion.  
    Generate your response in the following JSON format: 
    {
        "prediction": <YES|NO>,
        "reason": <reason>
    }
    """

In [15]:
exp_mean, rexp_mean, sexp_mean = features_data['exp'].astype(float).mean(), features_data['rexp'].astype(float).mean(), features_data['sexp'].astype(float).mean()

In [16]:
def get_exp_desc(mean, value):
    if int(float(value)) == int(mean):
        return 'average'
    elif float(value) < mean:
        return 'below average'
    elif float(value) > mean:
        return 'above average'
    else:
        return 'average'


In [17]:
exp_mean, rexp_mean, sexp_mean

(206.33247831928668, 106.32190481395303, 194.21912657697746)

In [24]:
pickle_file_path = 'jitfine/changes_test_lst.pkl'

In [25]:
with open(pickle_file_path, 'rb') as file:
    semantic_data = pickle.load(file)

In [26]:
pickle_file_path = 'jitfine/features_test.pkl'

In [27]:
with open(pickle_file_path, 'rb') as file:
    features_data = pickle.load(file)

In [28]:
len(semantic_data[0])

5480

In [29]:
len(features_data)

5480

In [1]:
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score

def calculate_metrics(y_true, y_pred):
    auc = roc_auc_score(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    return auc, accuracy, precision, recall, f1

In [33]:
results = []
preds = []

In [34]:
len(results)

0

In [35]:
preds = [r['prediction'] for r in results]

In [36]:
len(preds)

0

In [37]:
preds = [1.0 if pred == 'YES' else 0.0 for pred in preds]

## Zero-shot Chain-of-Thought

In [38]:
import json

In [39]:
preds = []
results = []

In [40]:
results = results[:-1]

In [41]:
len(results)

0

In [42]:
def label_to_str(label):
    if label == 1: 
        return "YES"
    elif label == 0:
        return "NO"

In [43]:
import pickle

with open('results_cot_o1_final.pkl', 'rb') as file:
    results = pickle.load(file)

In [None]:
len(results)

In [45]:
labels = semantic_data[1] 

In [46]:
preds = [r['prediction'] if isinstance(r, dict) else label_to_str(1 - labels[i]) for i,r in enumerate(results)]

In [47]:
preds = [1.0 if pred == 'YES' else 0.0 for pred in preds]

In [None]:
len(preds)

In [None]:
import json
from tqdm import tqdm
from json import JSONDecodeError

commit_ids = semantic_data[0] 
labels = semantic_data[1] 
comments = semantic_data[2] 
code = semantic_data[3]

for i in tqdm(range(len(preds),len(features_data))):
    prompt = f"""COMMIT MESSAGE: {comments[i]}"""
    
    if len(code[i]['added_code']) > 0:
        prompt += f"""\nADDED LINES: """ + str(code[i]['added_code']).replace('\'','')
   
    if len(code[i]['removed_code']) > 0:
        prompt += "\nREMOVED LINES:""" + str(code[i]['removed_code']).replace('\'','')
    
    assert features_data.loc[i, 'commit_hash'] == commit_ids[i]
    
    prompt += f"""\n
    NUMBER OF MODIFIED SUBSYSTEMS: {features_data.loc[i,'ns']}
    NUMBER OF MODIFIED DIRECTORIES: {features_data.loc[i,'nd']}
    NUMBER OF MODIFIED FILES: {features_data.loc[i,'nf']}
    ENTROPY (DISTRIBUTION OF MODIFIED CODE ACROSS EACH FILE): {features_data.loc[i,'entropy']}
    NUMBER OF LINES OF CODE ADDED: {features_data.loc[i,'la']}
    NUMBER OF LINES OF CODE DELETED: {features_data.loc[i,'ld']}
    NUMBER OF LINES OF CODE IN A FILE BEFORE THE CHANGE: {features_data.loc[i,'lt']}
    FIX (True if the change is a defect fix, False otherwise>): {features_data.loc[i,'fix']}
    NUMBER OF DEVELOPERS THAT CHANGED THE MODIFIED FILES: {features_data.loc[i,'ndev']}
    AVERAGE TIME BETWEEN THE LAST AND THE CURRENT CHANGE: {features_data.loc[i,'age']}
    NUMBER OF UNIQUE CHANGES TO THE MODIFIED FILES: {features_data.loc[i,'nuc']}
    DEVELOPER EXPERIENCE: {get_exp_desc(exp_mean, features_data.loc[i,'exp'])}
    RECENT DEVELOPER EXPERIENCE: {get_exp_desc(rexp_mean, features_data.loc[i,'rexp'])}
    DEVELOPER EXPERIENCE ON THE CURRENT SUBSYSTEM: {get_exp_desc(sexp_mean, features_data.loc[i,'sexp'])}
    """
    
    prompt += "\n Let's think step by step. "
    error = False
    
    messages=[{"role":"system", "content":system_message},{"role": "user", "content": prompt[:200000]}]
    
    result = client.chat.completions.create(model="o1", messages=messages)
    resp = result.choices[0].message.content
    content = resp[resp.find('{'):resp.rfind('}') + 1]
    
    try:
        content = json.loads(content.replace('\n', ' '))
    except JSONDecodeError:
        print(content)
        pred = 1 - labels[i]
        error = True    
    
    results.append(content)

    if not error:
        prediction = content["prediction"]
        pred = 0.0
        
        if prediction == "YES":
            pred = 1.0
    
    preds.append(pred)
    
    if i > 0 and (i % 100) == 0:
        auc, accuracy, precision, recall, f1 = calcular_metricas(labels[:i+1], preds)
        print(f"auc = {auc}, accuracy = {accuracy}, precision = {precision}, recall = {recall}, f1 = {f1}")
        with open('results_cot_o1_final.pkl', 'wb') as file:
            pickle.dump(results, file)
    

In [50]:
len(results), len(preds)

(5480, 5480)

In [53]:
auc, accuracy, precision, recall, f1 = calcular_metricas(labels[:len(preds)], preds)
print(f"auc = {auc}, accuracy = {accuracy}, precision = {precision}, recall = {recall}, f1 = {f1}")
    

auc = 0.6114306745885694, accuracy = 0.5773722627737227, precision = 0.1259650548557497, recall = 0.6526315789473685, f1 = 0.21117166212534058


In [54]:
with open('results_cot_o1_final.pkl', 'wb') as file:
    pickle.dump(results, file)