In [None]:
import csv
from openai import OpenAI
import os
import pandas as pd
from tqdm import tqdm


In [None]:
from args import args
from utils.string_helper import get_relevant_sents
from utils.tokenizer import tokenize

In [31]:
class args(args):
    model_name = ["llama3_8b", "llama3_70b", "mistral_7b", "ibm_granite_20b", "ibm_granite_34b", "codellama_7b", "codellama_34b"][6]
    device = "cuda"
    device_auto = True
    evaluated_summary_path = "./saved_data/evaluated_summary/evaluated_summary_{}.csv"
    summary_path = './saved_data/generated_summaries/generated_summaries_{}.csv'
    action_verification_path = "./saved_data/gpt_evaluations/action_verification_entity_level/gpt_action_verification_for_{}.csv"



config = {
    "use_nltk": True,
    "drop_stopword": True,
    "drop_punct": True
}

In [None]:
gpt_4 = "API_KEY"

client = OpenAI(
    api_key=gpt_4,
)

gpt_version = "gpt-4o"


In [None]:
summary_path = args.evaluated_summary_path.format(args.model_name)
print(summary_path)
summary_df = pd.read_csv(summary_path)
java_code = list(summary_df['java_code'])
summaries = list(summary_df['summary'])
unmapped_entities = [eval(x) for x in list(summary_df['unmapped_entities'])]
mapped_entities = [eval(x) for x in list(summary_df['mapped_entities'])]
# summary_df

In [35]:

prompt = """
Assume you are an expert in understanding JAVA code. 

Your task is to verify whether the description of '{mapped_entity}' in the given text is correct, incorrect, or irrelevant with respect to the code.
Only output one of the following labels: ["CORRECT", "INCORRECT", "IRRELEVANT"].
Do not provide any other details.

Description:
{relevant_sent}


[CODE]
{CODE}
[/CODE]
"""

In [36]:
def update_file_name(filename, data_folder='./saved_data/gpt_evaluations/action_verification_entity_level/'):
    """
    Updates the file name based on the current contents of the given directory -> Avoids overwriting
    ----------
    Parameters
    ----------
    filename: string
    Specified file name
    
    log_path: string
    Specified data directory
    ----------
    Returns the generated text
    """
    
    folder_content = list(os.listdir(data_folder))
    run = 1
    extension = ".csv"

    while filename.split(extension)[0][:-1]+str("_run_")+str(run)+extension in  folder_content:
        run = run+1
            
    return filename.split(extension)[0][:-1]+str("_run_")+str(run)+extension

In [None]:
file_name = args.action_verification_path.format(args.model_name)
update_file_name(file_name)

In [38]:

fields = ["java_code", "summary_{}".format(args.model_name),  'mapped_ents_{}'.format(args.model_name), 'gpt_pred_{}'.format(args.model_name)]
with open(file_name, 'w') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(fields)

In [30]:
action_verification = []
in_tokens = []
out_tokens = []


for i in tqdm(range(len(summaries))):
    
    current_summary = summaries[i]
    current_java_code = java_code[i]
    current_mapped_ents = mapped_entities[i]
    current_unmapped_entities = unmapped_entities[i]

    sent_text, words = tokenize(current_summary.lower(), config) # this gives us a list of sentences

    # Only Use the sentences with more than 4 characters
    longer_sent = [x for x in sent_text if len(x)>4]

    current_verification = {"summary":summaries[i], "java_code":java_code[i], "mapped_ents":current_mapped_ents, "verification":[]}

    for j in range(len(current_mapped_ents)):
    
        current_mapped_ent = current_mapped_ents[j]
        relevant_sents = get_relevant_sents(longer_sent, current_mapped_ent)
        
        for relevant_sent in relevant_sents:
            current_prompt = prompt.replace("{relevant_sent}", relevant_sent).replace("{mapped_entity}", current_mapped_ent).replace("{CODE}", current_java_code)
            
            response = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content":  current_prompt,
                    }
                ],
                model=gpt_version,
                max_tokens=100,
            )

            
            current_verdict = response.choices[0].message.content

            current_verification["verification"].append({"mapped_ent":current_mapped_ent, "relevant_sent": relevant_sent, "verdict":current_verdict})
            
            # Maintain token usage data
            in_tokens.append(response.usage.prompt_tokens)
            out_tokens.append(response.usage.completion_tokens)

    action_verification.append(current_verification)


    with open(file_name, 'a') as csvfile:
        csvwriter = csv.writer(csvfile)
        row_data = [current_verification["java_code"], current_verification["summary"],  current_verification["mapped_ents"],  current_verification["verification"]]
        csvwriter.writerow(row_data)




100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [1:07:52<00:00,  6.79s/it]


In [46]:
# action_verification_df = pd.DataFrame()

# action_verification_df['java_code'] = java_code
# action_verification_df['verification_{}'.format(args.model_name)] = verification
# action_verification_df['predicted_hallucinations_{}'.format(args.model_name)] = predicted_hallucinations
# action_verification_df.to_csv('./saved_data/gpt_evaluations/action_verification_entity_level/gpt_predicted_av_entity_level_{}.csv'.format(args.model_name), index=False)
