In [1]:
import os 
import time
import requests  
import json  
import openai
import numpy as np
from openai import OpenAI
from sklearn.metrics import precision_score, recall_score, f1_score

%reload_ext autoreload
%autoreload 2

In [2]:



def read_prompt_list(path):
    """read the saved list of prompts 
    input
        path, string path to save the list
    """
    with open(path, 'r') as file:  
        content = file.read()  
    prompt_list_read = content.split('\n\n\n')   

    return prompt_list_read





def save_completion_list(path, completion_list):
    """save the constructed prompts with few-shot examples in a list 
    input
        path, string path to save the list
        prompt_list, list of constructed prompts from the first requirement/sentence in test dataset to the last one
    """
    with open(path, 'w', newline='\n') as file:  
        for i, completion in enumerate(completion_list):
            if i+1 == len(completion_list):
                file.write(completion)
            else:     
                file.write(completion + "\n\n\n")


def read_completion_list(path):
    """read the saved list of prompts 
    input
        path, string path to save the list
    """
    with open(path, 'r') as file:  
        content = file.read()  
    completion_list_read = content.split('\n\n\n')   

    return completion_list_read



def process_completion_list(completion_list):
    return completion_list


def get_evaluation_results(ground_truth, processed_completion_list):
    """input
        ground_truth: list, true requirement classifications
        processed_completion_list: list, completions from LLMs
    """
    # Calculate precision, recall, and F1 score  
    precision = precision_score(ground_truth, processed_completion_list, zero_division = True, average='weighted')  
    recall = recall_score(ground_truth, processed_completion_list, zero_division = 0, average='weighted')  
    f1 = f1_score(ground_truth, processed_completion_list, average='weighted')  
    
    # Print the results  
    print("F1 Score:", f1 )  
    print("Precision:", precision)  
    print("Recall:", recall, "\n###########\n\n")  

    return f1, precision, recall



def precision_recall_f1(ground_truth, predictions):  
    '''
    given two list of binary values, e.g. functional, non-functional,
    calculate their precision, recall, and F1 score respectively.
    '''

    assert len(ground_truth) == len(predictions), "The length of ground truth and predictions must be the same."  
  
    def calculate_metrics(true_positive, false_positive, false_negative):  
        precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) != 0 else 0  
        recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) != 0 else 0  
        f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0  
        return precision, recall, f1_score  
  
    # Functional (1)  
    TP_functional = sum((gt == "Functional" and pred == "Functional") for gt, pred in zip(ground_truth, predictions))  
    FP_functional = sum((gt == "Non-functional" and pred == "Functional") for gt, pred in zip(ground_truth, predictions))  
    FN_functional = sum((gt == "Functional" and pred == "Non-functional") for gt, pred in zip(ground_truth, predictions))  
      
    precision_functional, recall_functional, f1_functional = calculate_metrics(TP_functional, FP_functional, FN_functional)  
  
    # Non-functional (0)  
    TP_non_functional = sum((gt == "Non-functional" and pred == "Non-functional") for gt, pred in zip(ground_truth, predictions))  
    FP_non_functional = sum((gt == "Functional" and pred == "Non-functional") for gt, pred in zip(ground_truth, predictions))  
    FN_non_functional = sum((gt == "Non-functional" and pred == "Functional") for gt, pred in zip(ground_truth, predictions))  
      
    precision_non_functional, recall_non_functional, f1_non_functional = calculate_metrics(TP_non_functional, FP_non_functional, FN_non_functional)  
  
    return precision_functional, recall_functional, f1_functional, precision_non_functional, recall_non_functional, f1_non_functional
    # {  
    #     "functional": {  
    #         "precision": precision_functional,  
    #         "recall": recall_functional,  
    #         "f1_score": f1_functional  
    #     },  
    #     "non_functional": {  
    #         "precision": precision_non_functional,  
    #         "recall": recall_non_functional,  
    #         "f1_score": f1_non_functional  
    #     }  
    # }  

# GPT-4o

In [3]:
api_key = os.environ["AI_ATTACK_API_KEY"]  

In [4]:

# Define the API URL and headers

def call_ai_attack_gpt( prompt ):
    headers = {
        "Authorization": api_key,  # Replace with your API key
        "Content-Type": "application/json",
    }
    
    # Define the request payload (adjust as needed for your use case)
    data = {
        "messages": [
            {'role': 'user', 'content': prompt },
            {'role': 'system', 'content': "You are a senior software engineer who is experienced in software requirement classification! "}      
        ],
        'temperature': 0,  
    }

    url = "https://openai-aiattack-001333-uksouth-01-freeexperiment.openai.azure.com/openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-10-01-preview"
    
    # Make the POST request
    response = requests.post(url, headers=headers, json=data)
    
    return response.json()["choices"][0]["message"]["content"]





def get_completion(prompt_list, save_path):
    """load the prompt_list, get the completion from llm, and save the results."""

    start_time = time.time()  
    
    completion_list = []
    for i, prompt in enumerate(prompt_list):
        #print("The prompt: " + prompt + "\n######")
     
        j = 0
        while j < 10:
            try:   
                completion = call_ai_attack_gpt( prompt )
                #print(completion)
                completion_list.append(completion)
                if i % 50 == 0:
                    print(i, completion)
                break
        
            except: #InvalidRequestError: in case that the prompt and output reach the token limitation
                #print("{}th prompt, Connection error: try again automatically".format(i) )
                time.sleep(2)  # stop 1 second to avoid error
            j = j+1   
            
        if j == 50:
            print("{}th prompt. At least one prompt did not get the result.".format(j))
            break

    save_completion_list(save_path, completion_list)
    
    # calculate the execution time
    end_time = time.time()  
    print("execution time: {}".format(end_time - start_time), "second")



## 10 shots

In [8]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_10_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_10_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)


0 Non-functional
50 Functional
execution time: 22.20600199699402 second
0 Non-functional
50 Functional
execution time: 21.425218105316162 second
0 Non-functional
50 Non-functional
execution time: 20.812497854232788 second
0 Non-functional
50 Non-functional
execution time: 20.6143901348114 second
0 Non-functional
50 Functional
execution time: 21.11317491531372 second
0 Functional
50 Functional
execution time: 21.227094888687134 second
0 Non-functional
50 Functional
execution time: 21.214984893798828 second
0 Functional
50 Functional
execution time: 21.33370876312256 second
0 Non-functional
50 Functional
execution time: 21.394917011260986 second
0 Non-functional
50 Non-functional
execution time: 21.37976598739624 second


## 20 shots

In [10]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_20_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_20_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 21.412516117095947 second
0 Non-functional
50 Functional
execution time: 22.830445051193237 second
0 Non-functional
50 Non-functional
execution time: 21.09874391555786 second
0 Non-functional
50 Functional
execution time: 21.98013210296631 second
0 Non-functional
50 Functional
execution time: 21.98577117919922 second
0 Functional
50 Functional
execution time: 21.640295028686523 second
0 Non-functional
50 Functional
execution time: 21.560828924179077 second
0 Functional
50 Functional
execution time: 21.148543119430542 second
0 Non-functional
50 Functional
execution time: 20.703577280044556 second
0 Non-functional
50 Non-functional
execution time: 21.363319158554077 second


## 40 shots

In [12]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_40_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_40_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 25.40230894088745 second
0 Non-functional
50 Functional
execution time: 23.034453868865967 second
0 Functional
50 Non-functional
execution time: 22.038060903549194 second
0 Non-functional
50 Functional
execution time: 22.065366983413696 second
0 Non-functional
50 Functional
execution time: 21.59586811065674 second
0 Functional
50 Functional
execution time: 21.810037851333618 second
0 Non-functional
50 Functional
execution time: 21.800164937973022 second
0 Functional
50 Functional
execution time: 23.163107872009277 second
0 Non-functional
50 Functional
execution time: 21.620661973953247 second
0 Non-functional
50 Non-functional
execution time: 21.713751792907715 second


## 80 shots

In [14]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_80_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 24.6927490234375 second
0 Non-functional
50 Functional
execution time: 24.058088064193726 second
0 Functional
50 Non-functional
execution time: 24.69658088684082 second
0 Non-functional
50 Functional
execution time: 25.27004313468933 second
0 Non-functional
50 Functional
execution time: 23.697745084762573 second
0 Functional
50 Functional
execution time: 24.320533990859985 second
0 Non-functional
50 Functional
execution time: 24.682086944580078 second
0 Functional
50 Functional
execution time: 28.465137004852295 second
0 Non-functional
50 Functional
execution time: 26.21147084236145 second
0 Non-functional
50 Non-functional
execution time: 23.463464975357056 second


## 120 shots

In [16]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_120_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_120_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 27.720547199249268 second
0 Non-functional
50 Functional
execution time: 26.945960998535156 second
0 Functional
50 Non-functional
execution time: 26.774640798568726 second
0 Non-functional
50 Functional
execution time: 27.84811520576477 second
0 Non-functional
50 Functional
execution time: 26.421175956726074 second
0 Functional
50 Functional
execution time: 27.02365803718567 second
0 Non-functional
50 Functional
execution time: 27.716442108154297 second
0 Functional
50 Functional
execution time: 26.576441049575806 second
0 Non-functional
50 Functional
execution time: 28.070454835891724 second
0 Non-functional
50 Non-functional
execution time: 28.306514024734497 second


## 160 shots

In [6]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_160_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_160_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 31.349101066589355 second
0 Non-functional
50 Functional
execution time: 31.54848885536194 second
0 Non-functional
50 Non-functional
execution time: 31.100898027420044 second
0 Non-functional
50 Functional
execution time: 30.333815097808838 second
0 Non-functional
50 Functional
execution time: 31.071208000183105 second
0 Functional
50 Functional
execution time: 30.215679168701172 second
0 Non-functional
50 Functional
execution time: 31.849251747131348 second
0 Functional
50 Functional
execution time: 31.21995210647583 second
0 Non-functional
50 Functional
execution time: 31.58035373687744 second
0 Non-functional
50 Non-functional
execution time: 31.582047939300537 second


## promise multi-class

In [5]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_20_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_20_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0 Usability
50 Functional
execution time: 22.040382862091064 second
0 Usability
50 Usability
execution time: 21.497967958450317 second
0 Operational
50 Performance
execution time: 20.873350858688354 second
0 Performance
50 Functional
execution time: 20.98603630065918 second
0 Availability
50 Functional
execution time: 20.925493001937866 second
0 Usability
50 Functional
execution time: 20.940655946731567 second
0 Portability
50 Functional
execution time: 20.982011795043945 second
0 Look and Feel
50 Functional
execution time: 20.81289315223694 second
0 Usability
50 Maintainability
execution time: 20.884876012802124 second
0 Look and Feel
50 Performance
execution time: 20.49965214729309 second


In [7]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_40_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_40_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0 Usability
50 Functional
execution time: 23.29054307937622 second
0 Usability
50 Functional
execution time: 21.48994207382202 second
0 Operational
50 Performance
execution time: 21.797166109085083 second
0 Performance
50 Functional
execution time: 21.561634063720703 second
0 Availability
50 Functional
execution time: 21.80914306640625 second
0 Look and Feel
50 Functional
execution time: 21.915853023529053 second
0 Portability
50 Functional
execution time: 22.103036880493164 second
0 Look and Feel
50 Functional
execution time: 22.1875319480896 second
0 Usability
50 Functional
execution time: 21.282878875732422 second
0 Look and Feel
50 Performance
execution time: 21.44584584236145 second


In [9]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_80_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0 Usability
50 Functional
execution time: 24.665464162826538 second
0 Usability
50 Functional
execution time: 24.200878858566284 second
0 Operational
50 Performance
execution time: 25.22034978866577 second
0 Performance
50 Functional
execution time: 24.175943851470947 second
0 Availability
50 Functional
execution time: 24.25656294822693 second
0 Usability
50 Functional
execution time: 24.97638487815857 second
0 Portability
50 Functional
execution time: 23.307584047317505 second
0 Look and Feel
50 Functional
execution time: 24.673303842544556 second
0 Usability
50 Functional
execution time: 24.342599153518677 second
0 Look and Feel
50 Performance
execution time: 23.902989149093628 second


In [5]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_120_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_120_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0 Usability
50 Functional
execution time: 29.8944571018219 second
0 Usability
50 Functional
execution time: 28.409409999847412 second
0 Operational
50 Performance
execution time: 28.737608194351196 second
0 Performance
50 Functional
execution time: 27.638320922851562 second
0 Availability
50 Functional
execution time: 27.178356885910034 second
0 Look and Feel
50 Functional
execution time: 28.630203247070312 second
0 Portability
50 Functional
execution time: 27.674352169036865 second
0 Look and Feel
50 Functional
execution time: 28.74039912223816 second
0 Usability
50 Functional
execution time: 27.8375141620636 second
0 Look and Feel
50 Performance
execution time: 28.155430793762207 second


In [6]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_160_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_promise_tfidf_160_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_160_1_mul.txt'

## pure 40 shots

In [21]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_pure_tfidf_40_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_4o/cv/cross_v_completion_pure_tfidf_40_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
100 Functional
150 Functional
200 Non-functional
250 Functional
300 Functional
350 Functional
400 Functional
450 Functional
500 Non-functional
550 Non-functional
600 Functional
execution time: 238.58899784088135 second
0 Functional
50 Functional
100 Non-functional
150 Functional
200 Functional
250 Functional
300 Functional
350 Functional
400 Functional
450 Non-functional
500 Functional
550 Functional
600 Functional
execution time: 233.31260895729065 second
0 Non-functional
50 Non-functional
100 Functional
150 Functional
200 Non-functional
250 Functional
300 Non-functional
350 Non-functional
400 Functional
450 Functional
500 Functional
550 Non-functional
600 Functional
execution time: 255.65900683403015 second
0 Non-functional
50 Functional
100 Non-functional
150 Functional
200 Functional
250 Functional
300 Functional
350 Functional
400 Functional
450 Functional
500 Functional
550 Non-functional
600 Functional
execution time: 219.40873908996582 second
0 No

# gpt 3.5

In [4]:
api_key = os.environ["AI_ATTACK_API_KEY"]  

In [5]:
# Define the API URL and headers

def call_ai_attack_gpt( prompt ):
    headers = {
        "Authorization": api_key,  # Replace with your API key
        "Content-Type": "application/json",
    }
    
    # Define the request payload (adjust as needed for your use case)
    data = {
        "messages": [
            {'role': 'user', 'content': prompt },
            {'role': 'system', 'content': "You are a senior software engineer who is experienced in software requirement classification! "}      
        ],
        'temperature': 0,  
    
    }

    url = "https://openai-aiattack-001333-uksouth-01-freeexperiment.openai.azure.com/openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-10-01-preview"
    
    # Make the POST request
    response = requests.post(url, headers=headers, json=data)
    
    # # Check if the request was successful
    # if response.status_code == 200:
    #     print("Request was successful!")
    #     print(response.json())
    # else:
    #     print(f"Request failed with status code {response.status_code}")
    #     print("Response:", response.text)

    return response.json()["choices"][0]["message"]["content"]


def get_completion(prompt_list, save_path):
    """load the prompt_list, get the completion from llm, and save the results."""

    start_time = time.time()  
    
    completion_list = []
    for i, prompt in enumerate(prompt_list):
        #print("The prompt: " + prompt + "\n######")
     
        j = 0
        while j < 10:
            try:   
                completion = call_ai_attack_gpt( prompt )
                #print(completion)
                completion_list.append(completion)
                if i % 50 == 0:
                    print(i, completion)
                break
        
            except: #InvalidRequestError: in case that the prompt and output reach the token limitation
                #print("{}th prompt, Connection error: try again automatically".format(i) )
                time.sleep(2)  # stop 1 second to avoid error
            j = j+1   
            
        if j == 50:
            print("{}th prompt. At least one prompt did not get the result.".format(j))
            break

    save_completion_list(save_path, completion_list)
    
    # calculate the execution time
    end_time = time.time()  
    print("execution time: {}".format(end_time - start_time), "second")


## 10 shots

In [8]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_10_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_10_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 21.328907012939453 second
0 Non-functional
50 Functional
execution time: 21.217073917388916 second
0 Non-functional
50 Non-functional
execution time: 21.37330198287964 second
0 Non-functional
50 Non-functional
execution time: 21.018218278884888 second
0 Non-functional
50 Functional
execution time: 21.084507942199707 second
0 Functional
50 Functional
execution time: 20.804666996002197 second
0 Non-functional
50 Functional
execution time: 21.095298051834106 second
0 Functional
50 Functional
execution time: 22.731213808059692 second
0 Non-functional
50 Functional
execution time: 24.490665197372437 second
0 Non-functional
50 Non-functional
execution time: 21.05338716506958 second


## 20 shots

In [10]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_20_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_20_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 21.368990898132324 second
0 Non-functional
50 Functional
execution time: 21.70224094390869 second
0 Non-functional
50 Non-functional
execution time: 21.365911960601807 second
0 Non-functional
50 Functional
execution time: 21.17951798439026 second
0 Non-functional
50 Functional
execution time: 21.645843029022217 second
0 Functional
50 Functional
execution time: 21.63080620765686 second
0 Non-functional
50 Functional
execution time: 22.2969331741333 second
0 Functional
50 Functional
execution time: 21.592032194137573 second
0 Non-functional
50 Functional
execution time: 21.460442304611206 second
0 Non-functional
50 Non-functional
execution time: 21.646267890930176 second


## 40 shots

In [12]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_40_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_40_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 22.622123956680298 second
0 Non-functional
50 Functional
execution time: 22.190609216690063 second
0 Non-functional
50 Non-functional
execution time: 21.964789152145386 second
0 Non-functional
50 Functional
execution time: 21.844176054000854 second
0 Non-functional
50 Functional
execution time: 22.260174989700317 second
0 Functional
50 Functional
execution time: 22.502750873565674 second
0 Non-functional
50 Functional
execution time: 21.66742992401123 second
0 Functional
50 Functional
execution time: 21.722078800201416 second
0 Non-functional
50 Functional
execution time: 22.8212149143219 second
0 Non-functional
50 Non-functional
execution time: 21.75950312614441 second


## 80 shots

In [14]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_80_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
execution time: 24.87195611000061 second
0 Non-functional
50 Functional
execution time: 24.943161964416504 second
0 Functional
50 Non-functional
execution time: 24.37481999397278 second
0 Non-functional
50 Functional
execution time: 25.72359800338745 second
0 Non-functional
50 Functional
execution time: 24.689903020858765 second
0 Functional
50 Functional
execution time: 25.433857917785645 second
0 Non-functional
50 Functional
execution time: 24.19427514076233 second
0 Functional
50 Functional
execution time: 24.66445803642273 second
0 Non-functional
50 Functional
execution time: 24.3054461479187 second
0 Non-functional
50 Non-functional
execution time: 23.954349756240845 second


## 120 shots

In [19]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_120_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_120_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0  Non-functional
50  Functional
execution time: 173.13507771492004 second
0  Functional (for the first part), Non-functional (for the second part)
50  Functional
execution time: 172.85049200057983 second
0  Functional
50  Non-functional
execution time: 170.64958596229553 second
0  Non-functional
50  Functional
execution time: 173.25612115859985 second
0  Non-functional
50  Functional
execution time: 172.6915500164032 second
0  Functional
50  Functional
execution time: 169.55049180984497 second
0  Functional
50  Functional
execution time: 172.04254698753357 second
0  Functional
50  Functional
execution time: 171.5107491016388 second
0  Non-functional
50  Functional
execution time: 177.660747051239 second
0  Non-functional
50  Non-functional
execution time: 180.60703587532043 second


## 160 shots

In [23]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_160_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_160_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0  Non-functional
50  Functional
execution time: 183.82532787322998 second
0  Functional (Intuitive and self-explanatory is a functional requirement, but the additional requirement about starting the display of Events or Activities within 90 minutes is a non-functional requirement)
50  Functional
execution time: 184.60814118385315 second
0  Functional
50  Non-functional
execution time: 179.90217065811157 second
0  Non-functional
50  Functional
execution time: 178.8980848789215 second
0  Non-functional
50  Functional
execution time: 179.0825982093811 second
0  Functional
50  Functional
execution time: 179.784245967865 second
0  Non-functional
50  Functional
execution time: 182.01335406303406 second
0  Functional
50  Functional
execution time: 207.46271800994873 second
0  Non-functional
50  Functional
execution time: 185.59070301055908 second
0  Non-functional
50  Non-functional
execution time: 183.73860573768616 second


## multi promise

In [21]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_40_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_40_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0  Usability
50  Functional
execution time: 154.80008506774902 second
0  Usability
50  Functional
execution time: 154.62903475761414 second
0  Functional
50  Performance
execution time: 150.41956186294556 second
0  Performance
50  Functional
execution time: 153.24399781227112 second
0  Availability
50  Operational
execution time: 154.15138602256775 second
0  Functional
50  Functional
execution time: 150.36188101768494 second
0  Operational
50  Functional
execution time: 151.73853397369385 second
0  Functional
50  Functional
execution time: 152.47742795944214 second
0  Usability
50  Functional
execution time: 154.5011351108551 second
0  Look and Feel
50  Performance
execution time: 157.93050479888916 second


In [6]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_80_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0 Usability
50 Functional
execution time: 26.946588039398193 second
0 Usability
50 Functional
execution time: 24.786670923233032 second
0 Operational
50 Performance
execution time: 25.236984968185425 second
0 Performance
50 Functional
execution time: 25.166715145111084 second
0 Availability
50 Functional
execution time: 24.653141975402832 second
0 Usability
50 Functional
execution time: 25.138212203979492 second
0 Portability
50 Functional
execution time: 24.954620122909546 second
0 Look and Feel
50 Functional
execution time: 25.671494007110596 second
0 Usability
50 Functional
execution time: 25.382508754730225 second
0 Look and Feel
50 Performance
execution time: 24.75789523124695 second


In [7]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_120_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_120_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0 Usability
50 Functional
execution time: 27.64516520500183 second
0 Usability
50 Functional
execution time: 27.528943300247192 second
0 Operational
50 Performance
execution time: 27.44933319091797 second
0 Performance
50 Functional
execution time: 27.336058855056763 second
0 Availability
50 Functional
execution time: 27.532338857650757 second
0 Look and Feel
50 Functional
execution time: 27.570201873779297 second
0 Portability
50 Functional
execution time: 26.943867206573486 second
0 Look and Feel
50 Functional
execution time: 26.91857671737671 second
0 Usability
50 Functional
execution time: 28.494157791137695 second
0 Look and Feel
50 Performance
execution time: 27.723124980926514 second


In [8]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_160_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_promise_tfidf_160_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_160_1_mul.txt'

## pure 40 shots

In [16]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_pure_tfidf_40_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/gpt_35_turbo/cv/cross_v_completion_pure_tfidf_40_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0 Non-functional
50 Functional
100 Functional
150 Functional
200 Non-functional
250 Functional
300 Functional
350 Functional
400 Functional
450 Functional
500 Non-functional
550 Non-functional
600 Functional
execution time: 217.94175910949707 second
0 Functional
50 Functional
100 Non-functional
150 Functional
200 Functional
250 Functional
300 Functional
350 Functional
400 Functional
450 Non-functional
500 Functional
550 Functional
600 Functional
execution time: 219.9918200969696 second
0 Non-functional
50 Non-functional
100 Functional
150 Functional
200 Non-functional
250 Functional
300 Non-functional
350 Non-functional
400 Functional
450 Functional
500 Functional
550 Non-functional
600 Functional
execution time: 215.27307271957397 second
0 Non-functional
50 Functional
100 Non-functional
150 Functional
200 Functional
250 Functional
300 Functional
350 Functional
400 Functional
450 Functional
500 Functional
550 Non-functional
600 Functional
execution time: 213.83485078811646 second
0 Non

# Mistral 7B

In [7]:
api_key = os.environ["OPENAI_API_KEY"]  

# Define the API endpoint URL  
def get_response_siemens_api(prompt):  
    # Define the request headers  
    headers = {  
        'accept': 'application/json',  
        'Authorization': 'Bearer {}'.format(api_key),  
        'Content-Type': 'application/json'  
    }  

    # Define the request payload  

    payload = {
    "model": "mistral-7b-instruct",
    "messages": [
        {
            "role": "user",
            "content": prompt,
            "tool_call_id": None
        }
    ],
    "temperature": 0,
    "stream": False,
    "max_tokens": 100,
    "tools": [
        {
            "type": "function",
            "function": {
                "description": "You are a senior software engineer who is experienced in software requirement classification! ",
                "name": "requirement engineering",
                "parameters": {
                    "name": "Siemens"
                }
            }
        }
    ],
    "tool_choice": "auto"
    }
      
    # Make the POST request  
    url = 'https://api.siemens.com/llm/chat/completions'  
    response = requests.post(url, headers=headers, data=json.dumps(payload))  
      
    # Print the response  
    return response.json()["choices"][0]["message"]["content"]






def get_completion(prompt_list, save_path):
    """load the prompt_list, get the completion from llm, and save the results."""

    start_time = time.time()  
    
    completion_list = []
    for i, prompt in enumerate(prompt_list):
        #print("The prompt: " + prompt + "\n######")
     
        j = 0
        while j < 50:
            try:   
                completion = get_response_siemens_api(prompt)
                #print(completion)
                completion_list.append(completion)
                if i % 50 == 0:
                    print(i, completion)
                time.sleep(2) 
                break
        
            except: #InvalidRequestError: in case that the prompt and output reach the token limitation
                print("{}th prompt, Connection error: try again automatically {} times".format(i , j) )
                time.sleep(5)  # stop 1 second to avoid error
            j = j+1   
            
        if j == 50:
            print("{}th prompt. At least one prompt did not get the result.".format(j))
            
    save_completion_list(save_path, completion_list)

    # calculate the execution time
    end_time = time.time()  
    print("execution time: {}".format(end_time - start_time), "second")


def process_completion_list(completion_list):
    processed_completion_list = [completion.strip('The answer is: ').strip().split()[0].strip(".") for completion in completion_list]
    #print(processed_completion_list)
    return processed_completion_list

### 20 shots

In [8]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_20_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_20_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0  Non-functional
50  Functional
execution time: 149.51476788520813 second
0  Non-functional
50  Functional
execution time: 151.27149510383606 second
0  Functional
50  Non-functional
execution time: 145.87419390678406 second
0  Non-functional (Performance)
50  Functional
execution time: 147.21771812438965 second
0  Non-functional
50  Non-functional
execution time: 148.871808052063 second
0  Functional
50  Functional
execution time: 148.24482011795044 second
0  Non-functional
50  Functional
execution time: 146.35002899169922 second
0  Functional
50  Functional
execution time: 152.73409008979797 second
0  Non-functional
50  Functional
execution time: 147.54706001281738 second
0  Non-functional
50  Non-functional
execution time: 147.96601510047913 second


### 40 shots

In [9]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_40_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_40_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0  Non-functional
50  Functional
execution time: 154.3040430545807 second
0  Functional
50  Functional
execution time: 153.4744839668274 second
0  Functional
50  Non-functional
execution time: 151.85587620735168 second
0  Non-functional
50  Functional
execution time: 150.91023707389832 second
0  Non-functional
50  Functional
execution time: 150.7755880355835 second
0  Functional
50  Functional
execution time: 151.80083298683167 second
0  Functional
50  Functional
execution time: 152.3628659248352 second
0  Functional
50  Functional
execution time: 150.84565997123718 second
0  Non-functional
50  Functional
execution time: 162.9347960948944 second
0  Non-functional
50  Non-functional
execution time: 153.98070192337036 second


## promise 80

In [28]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_80_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0  Non-functional
22th prompt, Connection error: try again automatically 0 times
22th prompt, Connection error: try again automatically 1 times
50  Functional
execution time: 495.91996812820435 second
0  Functional (for the first part), Non-functional (for the second part)
50  Functional
execution time: 184.09732699394226 second
0  Functional
50  Non-functional
execution time: 172.42126202583313 second
0  Non-functional
50  Functional
execution time: 175.4492688179016 second
0  Non-functional
5th prompt, Connection error: try again automatically 0 times
50  Functional
55th prompt, Connection error: try again automatically 0 times
execution time: 302.4410889148712 second
0  Functional
50  Functional
execution time: 180.9493372440338 second
0  Non-functional
50  Functional
execution time: 177.66189694404602 second
0  Functional
50  Functional
execution time: 181.5362319946289 second
0  Non-functional
50  Functional
execution time: 178.21558809280396 second
0  Non-functional
50  Non-funct

In [31]:
f1_list = []
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_bi.txt"
    processed_completion_list = process_completion_list(  read_completion_list(path_in)  )
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/label_promise_tfidf_80_" + str(fold) + "_bi.txt"
    ground_truth = read_prompt_list(path_in)
    f1, precision, recall = get_evaluation_results(ground_truth, processed_completion_list)
    f1_list.append(f1)
sum(f1_list)/len(f1_list), sum(f1_list[6:])/len(f1_list[6:])  

F1 Score: 0.8717809704526972
Precision: 0.8798387096774193
Recall: 0.8709677419354839 
###########


F1 Score: 0.8251357811958882
Precision: 0.8799810246679317
Recall: 0.8225806451612904 
###########


F1 Score: 0.8538926129952877
Precision: 0.8926974664679582
Recall: 0.8524590163934426 
###########


F1 Score: 0.8540728089404107
Precision: 0.8671212549462973
Recall: 0.8524590163934426 
###########


F1 Score: 0.8524590163934426
Precision: 0.8565927077444884
Recall: 0.8524590163934426 
###########


F1 Score: 0.8398396685281931
Precision: 0.8883473637571999
Recall: 0.8360655737704918 
###########


F1 Score: 0.9180327868852459
Precision: 0.9224844544940646
Recall: 0.9180327868852459 
###########


F1 Score: 0.9184752549722351
Precision: 0.9312533051295612
Recall: 0.9180327868852459 
###########


F1 Score: 0.9353862058780091
Precision: 0.9445145018915511
Recall: 0.9344262295081968 
###########


F1 Score: 0.8865010736203194
Precision: 0.8996431599773884
Recall: 0.8852459016393442 
####

(np.float64(0.8755576179861728), np.float64(0.9145988303389525))

In [32]:
precision_functional_list = []
recall_functional_list = []
f1_functional_list = []
precision_non_functional_list = []
recall_non_functional_list = []
f1_non_functional_list = []

for fold in range(7, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_bi.txt"
    processed_completion_list = process_completion_list(  read_completion_list(path_in)  )
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/label_promise_tfidf_80_" + str(fold) + "_bi.txt"
    ground_truth = read_prompt_list(path_in)
    precision_functional, recall_functional, f1_functional, precision_non_functional, recall_non_functional, f1_non_functional = precision_recall_f1(ground_truth, processed_completion_list)
    precision_functional_list.append(precision_functional)
    recall_functional_list.append(recall_functional)
    f1_functional_list.append(f1_functional)
    precision_non_functional_list.append(precision_non_functional)
    recall_non_functional_list.append(recall_non_functional)
    f1_non_functional_list.append(f1_non_functional)


print( "functional" ) 
print( "precision: ", sum(precision_functional_list)/len(precision_functional_list)  )
print( "recall: ", sum(recall_functional_list)/len(recall_functional_list) )
print( "f1_score: ", sum(f1_functional_list)/len(f1_functional_list)  )
print( "non-functional" ) 
print( "precision: ", sum(precision_non_functional_list)/len(precision_non_functional_list) )
print( "recall: ", sum(recall_non_functional_list)/len(recall_non_functional_list) )  
print( "f1_score: ", sum(f1_non_functional_list)/len(f1_non_functional_list)  )

functional
precision:  0.8382417429622658
recall:  0.980962643678161
f1_score:  0.9037261709020462
non-functional
precision:  0.9835668103448276
recall:  0.8668541481041481
f1_score:  0.921401595136449


## promise multi-class 20 40 80

In [10]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_20_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_20_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0  Usability
50  Functional
execution time: 148.8031768798828 second
0  Usability
50  Functional
execution time: 150.08065390586853 second
0  Functional
50  Performance
execution time: 149.95782995224 second
0  Performance
50  Functional
execution time: 146.99602794647217 second
0  Availability
50  Operational
execution time: 146.16474771499634 second
0  Functional
50  Functional
execution time: 145.43574595451355 second
0  Operational
50  Functional
execution time: 145.11185479164124 second
0  Functional
50  Functional
execution time: 146.24473214149475 second
0  Usability
50  Functional
execution time: 147.71568298339844 second
0  Look and Feel
50  Performance
execution time: 146.32494187355042 second


In [11]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_40_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_40_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0  Usability
50  Functional
execution time: 150.47771096229553 second
0  Usability
50  Functional
execution time: 152.19568610191345 second
0  Functional
50  Performance
execution time: 148.36481189727783 second
0  Performance
50  Functional
execution time: 158.73433208465576 second
0  Availability
50  Operational
execution time: 151.42403101921082 second
0  Functional
50  Functional
execution time: 150.14380598068237 second
0  Operational
50  Functional
execution time: 149.75875997543335 second
0  Functional
50  Functional
execution time: 148.4130847454071 second
0  Usability
50  Functional
execution time: 150.87236785888672 second
0  Look and Feel
50  Performance
execution time: 149.97245383262634 second


In [30]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_promise_tfidf_80_" + str(fold) + "_mul.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_promise_tfidf_80_" + str(fold) + "_mul.txt"
    get_completion(prompt_list, save_path)

0  Usability
50  Functional
execution time: 184.39641213417053 second
0  Usability
50  Functional
execution time: 177.207994222641 second
0  Functional
50  Performance
execution time: 180.95657801628113 second
0  Performance
50  Functional
execution time: 175.95678091049194 second
0  Availability
50  Operational
execution time: 183.98838806152344 second
0  Functional
50  Functional
execution time: 165.65986895561218 second
0  Operational
50  Functional
execution time: 165.09564185142517 second
0  Functional
50  Functional
execution time: 166.05184507369995 second
0  Usability
50  Functional
execution time: 162.44518494606018 second
0  Look and Feel
50  Performance
execution time: 166.39636778831482 second


## pure 80 

In [32]:
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/prompt_pure_tfidf_80_" + str(fold) + "_bi.txt"
    prompt_list = read_prompt_list(path_in)
    save_path = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_pure_tfidf_80_" + str(fold) + "_bi.txt"
    get_completion(prompt_list, save_path)

0  Non-functional
50  Functional
100  Functional
150  Functional
200  Non-functional
250  Functional
300  Functional
350  Functional
400  Functional
450  Functional
500  Functional
550  Functional
600  Functional
execution time: 1747.1701052188873 second
0  Functional
50  Functional
100  Non-functional
150  Functional
200  Functional
250  Functional
300  Functional
350  Functional
400  Functional
450  Non-functional
500  Functional
550  Functional
600  Functional
execution time: 1555.7990281581879 second
0  Non-functional
50  Functional
100  Functional
150  Functional
200  Non-functional
250  Functional
300  Non-functional
350  Non-functional
400  Functional
450  Functional
500  Functional
550  Non-functional
600  Functional
execution time: 1580.573058128357 second
0  Non-functional
50  Functional
100  Non-functional
150  Functional
200  Functional
217th prompt, Connection error: try again automatically 0 times
250  Functional
300  Functional
350  Functional
400  Functional
450  Functi

In [33]:
f1_list = []
for fold in range(1, 11):
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/completions/mistral_7b/cv/cross_v_completion_pure_tfidf_80_" + str(fold) + "_bi.txt"
    processed_completion_list = process_completion_list(  read_completion_list(path_in)  )
    path_in = "/Users/yongjiantang/Desktop/tang/code/re_genai/data/cross_validation_prompt_label/label_pure_tfidf_80_" + str(fold) + "_bi.txt"
    ground_truth = read_prompt_list(path_in)
    f1, precision, recall = get_evaluation_results(ground_truth, processed_completion_list)
    f1_list.append(f1)
sum(f1_list)/len(f1_list), sum(f1_list[6:])/len(f1_list[6:])  

F1 Score: 0.799536582847111
Precision: 0.8501855287569573
Recall: 0.7807519773344351 
###########


F1 Score: 0.7778665010498921
Precision: 0.8360564940459707
Recall: 0.7589628681177978 
###########


F1 Score: 0.5449272989464802
Precision: 0.5743928738865448
Recall: 0.532906501220631 
###########


F1 Score: 0.8018172152143443
Precision: 0.8602374551971326
Recall: 0.782420091324201 
###########


F1 Score: 0.5505600051054597
Precision: 0.5786438242190455
Recall: 0.5378997594542372 
###########


F1 Score: 0.5393427569284499
Precision: 0.5749061736046551
Recall: 0.5258576006031477 
###########


F1 Score: 0.538613254162398
Precision: 0.5741334701819659
Recall: 0.5223285486443382 
###########


F1 Score: 0.5377873715748375
Precision: 0.5684558312079758
Recall: 0.5249699193246148 
###########


F1 Score: 0.5350503792841947
Precision: 0.5760661126837732
Recall: 0.5212598363328501 
###########


F1 Score: 0.5622557047712632
Precision: 0.5877314214966457
Recall: 0.5487408346746004 
########

(np.float64(0.618775706988443), np.float64(0.5434266774481733))

# llama 31 8b 

# binary promise