In [4]:
# autoreload
%load_ext autoreload
%autoreload 2
import pandas as pd

from utils.utils import load_dfs, get_related_questions
import re

In [35]:
def print_question(results, results_metadata, index):
    '''
    Print the question and the model answer to the question

    Parameters:
    dfs: list
        A list of dataframes: questions_df, options_df, answers_df, questions_metadata
    results: dataframe
        A dataframe of the results
    index: int
        The index into the dataframe
    '''


    base_id = results.iloc[index]['question_id'].split('_')[0]
    metadata = results.iloc[index][['num_shots', 'question_type']].to_dict()

    results_df = get_related_questions(results, base_id)
    results_df = results_df.query("num_shots ==@ metadata['num_shots'] and question_type ==@ metadata['question_type']")

    model_answers = results_df['model_answer'].tolist()
    model_explanations = None
    if any(results_df['allow_explanation'].tolist()):
        model_explanations = results_df['model_explanation'].tolist()
    accuracy = results_df['accuracy'].tolist()
    
    metadata_df = get_related_questions(results_metadata, base_id)
    metadata_df = metadata_df.iloc[[x for x in range(index, len(results_df))]]
    prompts = metadata_df['prompt'].tolist()

    prompts = [prompt[0] if type(prompt) == list else prompt for prompt in prompts]

    # color the answer text depending on accuracy
    for i, acc in enumerate(accuracy):
        if acc:
            model_answers[i] = f'\033[92m{model_answers[i]}\033[0m'
        else:
            model_answers[i] = f'\033[91m{model_answers[i]}\033[0m'
    
    # check if first string in model_explanations is not empty
    if model_explanations:
        return '\n'.join([f'{mcq}\nExplanation: {model_explanation}\nCorrect Answer: {model_answer}' for mcq, model_answer, model_explanation in zip(prompts, model_answers, model_explanations)])
    return '\n'.join([mcq.replace('Correct Answer:', f'Model Answer: {model_answer}') for mcq, model_answer in zip(prompts, model_answers)])
    # print('Model Answer:', df.loc[df['question_id'] == base_id, 'model_answer'].values[0])


In [46]:
results = pd.read_pickle('../llm_evaluation_results/gpt-4-1106-preview/sunk_cost_results.pkl')
results

Unnamed: 0,model,task_name,question_id,domain,difficulty_level,type,model_answer,model_explanation,allow_explanation,probabilities,num_shots,question_type,permuted_answer,accuracy,normalized_accuracy,expected_calibration
0,gpt-4-1106-preview,sunk_cost,395229_0,personal_health;time,4,self,B,,False,"{'A': 0.0, 'B': 0.0}",0,mc,0.0,False,-0.5,0.0
1,gpt-4-1106-preview,sunk_cost,89560_0,medical;money,4,other,B,,False,"{'A': 0.0, 'B': 0.0}",0,mc,1.0,True,0.5,0.0
2,gpt-4-1106-preview,sunk_cost,594383_0,responsible;money,4,self,B,,False,"{'A': 0.0, 'B': 0.0}",0,mc,0.0,False,-0.5,0.0
3,gpt-4-1106-preview,sunk_cost,530883_0,not-responsible;money,4,self,A,,False,"{'A': 0.0, 'B': 0.0}",0,mc,1.0,True,0.5,0.0
4,gpt-4-1106-preview,sunk_cost,139676_0,personal_health;money,4,signal,B,,False,"{'A': 0.0, 'B': 0.0}",0,mc,1.0,True,0.5,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13894,gpt-4-1106-preview,sunk_cost,524871_0,medical;time,4,signal,B,```As a developer who has invested 9 years in ...,True,"{'A': 0.0, 'B': 0.0}",0,sequential-hidden,1.0,True,0.5,0.0
13895,gpt-4-1106-preview,sunk_cost,674764_0,medical;time,4,other,A,```Dr. Lewis should consider the new therapy p...,True,"{'A': 0.0, 'B': 0.0}",0,sequential-hidden,1.0,True,0.5,0.0
13896,gpt-4-1106-preview,sunk_cost,679598_0,tech;money,4,other,B,Pamela is facing a situation where she needs t...,True,"{'A': 0.0, 'B': 0.0}",0,sequential-hidden,1.0,True,0.5,0.0
13897,gpt-4-1106-preview,sunk_cost,694323_0,personal_health;time,4,other,B,"```\nYes, Susan should consider incorporating ...",True,"{'A': 0.0, 'B': 0.0}",0,sequential-hidden,1.0,True,0.5,0.0


In [47]:
results_metadata = pd.read_pickle('../llm_evaluation_results/gpt-4-1106-preview/sunk_cost_metadata.pkl')
results_metadata

Unnamed: 0,task_name,model_name,question_id,permutation,prompt,inference_time
0,sunk_cost,gpt-4-1106-preview,395229_0,"[1, 0]",[You have been seeing a personal trainer for 1...,2.690928
1,sunk_cost,gpt-4-1106-preview,89560_0,"[0, 1]",[Dr. Steven has invested $95785 into medical e...,0.388752
2,sunk_cost,gpt-4-1106-preview,594383_0,"[1, 0]",[You are the owner of a restaurant chain and a...,0.469965
3,sunk_cost,gpt-4-1106-preview,530883_0,"[1, 0]",[Imagine a predecessor invested $700 million i...,0.597864
4,sunk_cost,gpt-4-1106-preview,139676_0,"[0, 1]","[After dealing with chronic migraines, you hav...",0.466535
...,...,...,...,...,...,...
13894,sunk_cost,gpt-4-1106-preview,524871_0,"[0, 1]",You have been investing your time for 9 years ...,38.816039
13895,sunk_cost,gpt-4-1106-preview,674764_0,"[1, 0]",Dr. Lewis has spent 6 weeks preparing a therap...,17.263126
13896,sunk_cost,gpt-4-1106-preview,679598_0,"[0, 1]",Pamela is the owner of a tech startup that is ...,59.321703
13897,sunk_cost,gpt-4-1106-preview,694323_0,"[0, 1]",Susan has been unable to shed weight despite s...,13.215069


In [69]:
results.query("accuracy == False and question_type == 'explanation' and domain == 'not-responsible;money'").loc[11882]['model_explanation']

'"""\nThe decision should be based on the principle of sunk costs and marginal analysis. The $60 already invested is a sunk cost and should not factor into the decision-making process. What matters is whether the additional $200 investment will yield a profit given the new projected profit of $240. If the additional investment is less than the revised profit, it would make sense to continue with the project.\n"""\n\nCorrect Answer: B. Continue to invest the remaining $240 in the older technology.'

In [70]:
results_metadata.loc[11882]['prompt']

"Previously, another manager had decided to invest in a new technology expected to give $300 in profit. To date, $60 have been invested in the project. To complete the project, an additional $200 are required. However, a new, competitor's technology has been introduced, reducing the projected profit to $240. Would you:\nA. Cut your losses and stop the project.\nB. Continue to invest the remaining $240 in the older technology.\n\nBegin by explaining your reasoning in 2-3 sentences, enclosed in triple quotes. After your explanation, select and state the correct answer by writing 'Correct Answer: ' followed by your choice. BEGIN WITH YOUR EXPLANATION AND WRITE THE CORRECT ANSWER AT THE END"

In [27]:
sunk_cost_dfs = load_dfs('../elements/sunk_cost/', as_dict = True)

../elements/sunk_cost/


In [32]:
results.iloc[0][['num_shots', 'question_type']].to_dict()

{'num_shots': 0, 'question_type': 'mc'}

In [56]:
results_metadata.iloc[12667]['prompt']

"Suppose you bought unknown shares for $800 and after a week, their value dropped to $645. A friend offers to sell you well-rated shares of the same quantity for $566. Would you rather:\nA. Hold onto the sinking investment you made first.\nB. Sell the first shares at a loss and invest in the better-rated shares with your 645.\n\nBegin by explaining your reasoning in 2-3 sentences, enclosed in triple quotes. After your explanation, select and state the correct answer by writing 'Correct Answer: ' followed by your choice. BEGIN WITH YOUR EXPLANATION AND WRITE THE CORRECT ANSWER AT THE END"

In [55]:
results.iloc[12667]['model_explanation']

'"""\nThe decision to hold onto the sinking investment or to sell and invest in better-rated shares should be based on the potential for future returns and the quality of the investment. If the original shares are expected to continue losing value and the well-rated shares have a higher potential for appreciation, it would be wise to cut losses and invest in the better option. However, if the original shares have a chance to recover and outperform the well-rated shares, it might be better to hold onto them. The decision should also consider the cost of selling the original shares (such as transaction fees) and the credibility of the friend\'s advice.\n"""\n\nCorrect Answer: B. Sell the first shares at a loss and invest in the better-rated shares with your $645.'

In [52]:
print(print_question(results=results, results_metadata=results_metadata, index=11014))


