In [None]:
from IPython.display import display, HTML
import pandas as pd
import os

## Load Model Repsonses

### List of files

In [None]:
models_dir = '../../../Results/LLMs_Responses'

files_list = os.listdir(models_dir)

for num, file in enumerate(files_list, start=0):
    print(num, file)

### Choose a model

In [None]:
model_file_name = 'gpt-4o-last_5_topics-ZS_with_citing_prompting-response.csv'
file_name = [file for file in files_list if model_file_name in file][0]

responses_file_path = os.path.join(models_dir, file_name)

responses_df = pd.read_csv(responses_file_path, encoding='utf-8')
responses_df['Problem'] = responses_df['Problem'].apply(eval)

print(file_name,'\n')
pd.set_option('display.max_columns', None)
responses_df.head(2)

### to compare with a similar model, or same model with different configuration

In [None]:
# second_file_index = 17
# second_file_name = files_list[second_file_index]

# responses_file_path_2 = os.path.join(models_dir, second_file_name)

# pd.set_option('display.max_rows', None)

# responses_df_2 = pd.read_csv(responses_file_path_2, encoding='utf-8')

# diff_df = abs(responses_df_2.loc[:, 'Q1':'Q15'] - responses_df.loc[:, 'Q1':'Q15'])
# huge_difference = diff_df.gt(1).sum()  # Adjusted indexing here
# diff_df.head()
# huge_difference


## Explore Results

In [None]:
columns_with_none = (responses_df.isna() | (responses_df == '')).sum()
columns_with_none

In [None]:
# pd.set_option('display.max_columns', None)

rows_with_none = responses_df[responses_df.iloc[:,:-1].isnull().any(axis=1)]
print('Rows with NaN values:', rows_with_none.index.tolist())
rows_with_none

In [None]:
# Display individual response
cell_index = 50
response_number = 9

text = responses_df.at[cell_index, f'Response_{response_number}']

display(HTML("<div style='white-space: pre-wrap;'>{}</div>".format(text)))

In [None]:
indices_with_problems = responses_df[responses_df['Problem'].apply(lambda x: len(x) > 0)].index.tolist()
print(indices_with_problems)

In [None]:
if indices_with_problems:
    index_with_problem = 36
    responses_with_problem_list = list(responses_df.loc[index_with_problem, 'Problem'])
    print(responses_with_problem_list)

    response_with_problem = responses_with_problem_list[0]
    text = responses_df.loc[index_with_problem, f'Response_{response_with_problem}']
    display(HTML("<div style='white-space: pre-wrap;'>{}</div>".format(text)))

In [None]:
# Display responses for a selected video
index_to_display = 207
for question_num in range(1, 16):
    text = responses_df.loc[index_to_display,f'Response_{question_num}']

    print(f'Q{question_num}:', end=' ')
    display(HTML("<div style='white-space: pre-wrap;'>{}</div>".format(text)))
    print()

Remove prompts if not removed already

In [None]:
import re

def remove_prompt_from_response(response: str) -> str:
    phrase = 'Score:'
    idx = response.find(phrase)  # Find the starting index of the phrase in the response
    if idx != -1:
        # Remove the prompt by slicing the response from the end of the phrase
        response = response[idx + len(phrase):]
    return response

def extract_rating(response: str, rating_scale) -> int:
    '''
    Extract rating integer from beginning of LLM response.
    Note: Prompt should have been removed already from the beginning of the response.

    Returns:
        int or None: The extracted rating if found, otherwise None.
    '''
    if rating_scale == 5:
        pattern = r'([1-5])'
    elif rating_scale == 1:
        pattern = r'([0-1])'
    else:
        raise ValueError('rating_scale should be either 5 or 1.')
    match = re.search(pattern, response)  # Search for the first encountered integer
    return int(match.group(1)) if match else None

for i in range(len(responses_df)):
    for j in range(1,16):
        text = responses_df.at[i, f'Response_{j}']
        after_text = remove_prompt_from_response(text)
        responses_df.at[i, f'Response_{j}'] = after_text
        responses_df.at[i, f'Q{j}'] = extract_rating(after_text, rating_scale=5)

responses_df.head()

Save the updated DataFrame after updating the repsonses and scores

In [None]:
csv_output_file = os.path.join(models_dir, model_file_name)

responses_df.to_csv(csv_output_file, index=False, encoding='utf-8')