In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl

In [None]:
import glob
import pandas as pd
import numpy as np
import statsmodels.api as sm
import seaborn as sns
import math
from matplotlib.lines import Line2D
from scipy import stats



In [None]:
# Edit the font, font size, and axes width
plt.rcParams['font.size'] = 10
plt.rcParams['axes.linewidth'] = 1
plt.rcParams["figure.autolayout"] = True

In [None]:
# human regret
df = pd.read_csv('human_data/data.csv')
df_h1 = df[(df.Trial == 5) & (df.Horizon==5)]
r_max = df_h1[["mu_L", "mu_R"]].max(axis=1)
r_obs = df_h1.Outcome
human_regrets_h1 = (r_max - r_obs).to_numpy()
human_regrets_h6 = []
for i in range(5, 11):
    df_h6 = df[(df.Trial == i) & (df.Horizon==10)]
    r_max = df_h6[["mu_L", "mu_R"]].max(axis=1)
    r_obs = df_h6.Outcome
    human_regrets_h6.append((r_max - r_obs).to_numpy())

human_regrets_h6 = np.array(human_regrets_h6).T
print(human_regrets_h6.shape)
print(human_regrets_h6.mean(0))
human_data = {'human_regrets_h1':human_regrets_h1, "human_regrets_h6":human_regrets_h6}

In [None]:
def get_data(path):
    num_h1 = 0
    num_h6 = 0

    files = glob.glob(path)

    for file in files:
        df = pd.read_csv(file)
        if len(df) == 5:
            num_h1 += 1
        elif len(df) == 10:
            num_h6 += 1

    regrets_h1 = np.zeros((num_h1, 1))
    regrets_h6 = np.zeros((num_h6, 6))
    random_regrets_h6 = np.zeros((num_h6, 6))

    print(num_h1)
    print(num_h6)

    counter_h1 = 0
    counter_h6 = 0

    for file in files:
        df = pd.read_csv(file)
        for t in range(4, df.trial.max() + 1):
            max_reward = np.max((df[df.trial == t].mean0, df[df.trial == t].mean1))
            gpt_reward = df[df.trial == t].mean0 if int(df[df.trial == t].choice) == 0 else df[df.trial == t].mean1
            regret = (max_reward - gpt_reward)
            random_reward = 0.5 * df[df.trial == t].mean0 + 0.5 * df[df.trial == t].mean1
            random_regret = (max_reward - random_reward)
            if len(df) == 5:
                regrets_h1[counter_h1, t-4] = regret
                counter_h1 += 1
            elif len(df) == 10:
                regrets_h6[counter_h6, t-4] = regret
                random_regrets_h6[counter_h6, t-4] = random_regret
                if t == df.trial.max():
                    counter_h6 += 1
    print(regrets_h6.shape)
    print(regrets_h6.mean(0))
    print(random_regrets_h6.shape)
    print(random_regrets_h6.mean(0))
    results =  {"random_regrets_h6":random_regrets_h6, "regrets_h1":regrets_h1, "regrets_h6":regrets_h6}

    return results

In [None]:
palette = {
    1 : "#ff7f00", # 'orange'
    2 : "#377eb8", # 'blue'
    3 : "#4daf4a", # 'green'
    4 : "#f781bf", # 'pink'
    5 : "#dede00" # 'yellow'
}

In [None]:
temp_custom_lines = [Line2D([0], [0], color='black', marker='s', linestyle='None'),
    Line2D([0], [0], color='black', linestyle='--'),
    Line2D([0], [0], color='black',  linestyle='-'), # human black
    Line2D([0], [0], color=palette[1], linestyle='-'), # line 1 
    Line2D([0], [0], color=palette[2], linestyle='-'), # line 2
    Line2D([0], [0], color=palette[3], linestyle='-')] # line 3

In [None]:
# davinci-002
# sample path -- text-davinci-002/.../e*
d1 = get_data("PATH TO FILES WITH TEMP 0.0")
d2 = get_data("PATH TO FILES WITH TEMP 0.5")
d3 = get_data("PATH TO FILES WITH TEMP 1.0")

In [None]:
def plot_data_multi(d1, d2, d3 ,h,temp_custom_lines):
    plt.rcParams["figure.figsize"] = (3.46327,3.7)
    custom_lines = temp_custom_lines
    random_regrets_h6 = d1['random_regrets_h6']
    human_regrets_h1 = h['human_regrets_h1']
    human_regrets_h6 = h['human_regrets_h6']
    plt.axhline(y=random_regrets_h6.mean(), color='C3', linestyle='--', alpha=0.7)
    
    plt.scatter(np.arange(1) + 1 -0.1, d1['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[1])
    plt.errorbar(np.arange(1) + 1-0.1, d1['regrets_h1'].mean(0), alpha=0.7, yerr=(d1['regrets_h1'].mean(0) / math.sqrt(d1['regrets_h1'].shape[0])), color=palette[1])
    plt.errorbar(np.arange(6) + 1-0.1, d1['regrets_h6'].mean(0), alpha=0.7, yerr=(d1['regrets_h6'].mean(0) / math.sqrt(d1['regrets_h6'].shape[0])), color=palette[1], linestyle='--',  marker='o')
    
    plt.scatter(np.arange(1) + 1 -0.1, d2['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[2])
    plt.errorbar(np.arange(1) + 1-0.1, d2['regrets_h1'].mean(0), alpha=0.7, yerr=(d2['regrets_h1'].mean(0) / math.sqrt(d2['regrets_h1'].shape[0])), color=palette[2])
    plt.errorbar(np.arange(6) + 1-0.1, d2['regrets_h6'].mean(0), alpha=0.7, yerr=(d2['regrets_h6'].mean(0) / math.sqrt(d2['regrets_h6'].shape[0])), color=palette[2], linestyle='--',  marker='o')
    
    plt.scatter(np.arange(1) + 1 -0.1, d3['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[3])
    plt.errorbar(np.arange(1) + 1-0.1, d3['regrets_h1'].mean(0), alpha=0.7, yerr=(d3['regrets_h1'].mean(0) / math.sqrt(d3['regrets_h1'].shape[0])), color=palette[3])
    plt.errorbar(np.arange(6) + 1-0.1, d3['regrets_h6'].mean(0), alpha=0.7, yerr=(d3['regrets_h6'].mean(0) / math.sqrt(d3['regrets_h6'].shape[0])), color=palette[3], linestyle='--',  marker='o')

    plt.scatter(np.arange(1) + 1 +0.1, human_regrets_h1.mean(0), alpha=0.7, marker='s', color='black')
    plt.errorbar(np.arange(1) + 1 +0.1, human_regrets_h1.mean(0), alpha=0.7, yerr=(human_regrets_h1.mean(0) / math.sqrt(human_regrets_h1.shape[0])), color='black')
    plt.errorbar(np.arange(6) + 1 + 0.1, human_regrets_h6.mean(0), alpha=0.7, yerr=(human_regrets_h6.mean(0) / math.sqrt(human_regrets_h6.shape[0])), color='black', linestyle='-', marker='o')

    plt.text(1.0, random_regrets_h6.mean() - 0.35, 'random', color='C3', alpha=0.7, size=10)
    plt.ylabel('Mean regret')

    plt.xlim(0.75, 6.25)
    plt.xlabel('Trials')
    plt.ylim(1, random_regrets_h6.mean() + 0.2)
    plt.savefig('FILENAME_OF_PLOT.pdf', bbox_inches='tight')
    plt.show()

    return

In [None]:
plot_data_multi(d1,d2,d3,human_data, temp_custom_lines)

Plots for COT variations

In [None]:
CoT_custom_lines = [Line2D([0], [0], color='black', marker='s', linestyle='None'),
    Line2D([0], [0], color='black', linestyle='--'),
    Line2D([0], [0], color='black',  linestyle='-'), # humans
    Line2D([0], [0], color=palette[1], linestyle='-'), # line 1 Without CoT
    Line2D([0], [0], color=palette[2], linestyle='-'), # line 2 Quasi CoT
    Line2D([0], [0], color=palette[3], linestyle='-')] # line 3 CoT

In [12]:
def plot_data_multi(d1, d2, d3, h,temp_custom_lines):
    plt.rcParams["figure.figsize"] = (3.46327,3.7)
    custom_lines = temp_custom_lines
    random_regrets_h6 = d1['random_regrets_h6']
    human_regrets_h1 = h['human_regrets_h1']
    human_regrets_h6 = h['human_regrets_h6']
    plt.axhline(y=random_regrets_h6.mean(), color='C3', linestyle='--', alpha=0.7)
    
    plt.scatter(np.arange(1) + 1 -0.1, d1['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[1])
    plt.errorbar(np.arange(1) + 1-0.1, d1['regrets_h1'].mean(0), alpha=0.7, yerr=(d1['regrets_h1'].mean(0) / math.sqrt(d1['regrets_h1'].shape[0])), color=palette[1])
    plt.errorbar(np.arange(6) + 1-0.1, d1['regrets_h6'].mean(0), alpha=0.7, yerr=(d1['regrets_h6'].mean(0) / math.sqrt(d1['regrets_h6'].shape[0])), color=palette[1], linestyle='--',  marker='o')
    
    plt.scatter(np.arange(1) + 1 -0.1, d2['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[2])
    plt.errorbar(np.arange(1) + 1-0.1, d2['regrets_h1'].mean(0), alpha=0.7, yerr=(d2['regrets_h1'].mean(0) / math.sqrt(d2['regrets_h1'].shape[0])), color=palette[2])
    plt.errorbar(np.arange(6) + 1-0.1, d2['regrets_h6'].mean(0), alpha=0.7, yerr=(d2['regrets_h6'].mean(0) / math.sqrt(d2['regrets_h6'].shape[0])), color=palette[2], linestyle='--',  marker='o')
    
    plt.scatter(np.arange(1) + 1 -0.1, d3['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[3])
    plt.errorbar(np.arange(1) + 1-0.1, d3['regrets_h1'].mean(0), alpha=0.7, yerr=(d3['regrets_h1'].mean(0) / math.sqrt(d3['regrets_h1'].shape[0])), color=palette[3])
    plt.errorbar(np.arange(6) + 1-0.1, d3['regrets_h6'].mean(0), alpha=0.7, yerr=(d3['regrets_h6'].mean(0) / math.sqrt(d3['regrets_h6'].shape[0])), color=palette[3], linestyle='--',  marker='o')

    plt.scatter(np.arange(1) + 1 +0.1, human_regrets_h1.mean(0), alpha=0.7, marker='s', color='black')
    plt.errorbar(np.arange(1) + 1 +0.1, human_regrets_h1.mean(0), alpha=0.7, yerr=(human_regrets_h1.mean(0) / math.sqrt(human_regrets_h1.shape[0])), color='black')
    plt.errorbar(np.arange(6) + 1 + 0.1, human_regrets_h6.mean(0), alpha=0.7, yerr=(human_regrets_h6.mean(0) / math.sqrt(human_regrets_h6.shape[0])), color='black', linestyle='-', marker='o')

    plt.text(1.0, random_regrets_h6.mean() - 0.35, 'random', color='C3', alpha=0.7, size=10)
    plt.ylabel('Mean regret')

    plt.xlim(0.75, 6.25)
    plt.xlabel('Trials')
    plt.ylim(0, random_regrets_h6.mean() + 0.2)
    plt.savefig('final_figures_3/davinci-003_CoT_variations_1.pdf', bbox_inches='tight')
    plt.show()

    return

In [None]:
# davinci-002
# sample path -- text-davinci-002/.../e*
d1 = get_data("PATH TO FILES WITHOUT CoT")
d2 = get_data("PATH TO FILES WITH QUASI-CoT")
d3 = get_data("PATH TO FILES WITH CoT")

In [None]:
plot_data_multi(d1,d2,d2, human_data, CoT_custom_lines)

CoT With Hints

In [None]:
def plot_data_multi(d1, d2, d3, d4, h,temp_custom_lines):
    plt.rcParams["figure.figsize"] = (3.75,4.5)
    custom_lines = temp_custom_lines
    random_regrets_h6 = d1['random_regrets_h6']
    human_regrets_h1 = h['human_regrets_h1']
    human_regrets_h6 = h['human_regrets_h6']
    plt.axhline(y=random_regrets_h6.mean(), color='C3', linestyle='--', alpha=0.7)
    
    plt.scatter(np.arange(1) + 1 -0.1, d1['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[1])
    plt.errorbar(np.arange(1) + 1-0.1, d1['regrets_h1'].mean(0), alpha=0.7, yerr=(d1['regrets_h1'].mean(0) / math.sqrt(d1['regrets_h1'].shape[0])), color=palette[1])
    plt.errorbar(np.arange(6) + 1-0.1, d1['regrets_h6'].mean(0), alpha=0.7, yerr=(d1['regrets_h6'].mean(0) / math.sqrt(d1['regrets_h6'].shape[0])), color=palette[1], linestyle='--',  marker='o')
    
    plt.scatter(np.arange(1) + 1 -0.1, d2['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[2])
    plt.errorbar(np.arange(1) + 1-0.1, d2['regrets_h1'].mean(0), alpha=0.7, yerr=(d2['regrets_h1'].mean(0) / math.sqrt(d2['regrets_h1'].shape[0])), color=palette[2])
    plt.errorbar(np.arange(6) + 1-0.1, d2['regrets_h6'].mean(0), alpha=0.7, yerr=(d2['regrets_h6'].mean(0) / math.sqrt(d2['regrets_h6'].shape[0])), color=palette[2], linestyle='--',  marker='o')
    
    plt.scatter(np.arange(1) + 1 -0.1, d3['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[3])
    plt.errorbar(np.arange(1) + 1-0.1, d3['regrets_h1'].mean(0), alpha=0.7, yerr=(d3['regrets_h1'].mean(0) / math.sqrt(d3['regrets_h1'].shape[0])), color=palette[3])
    plt.errorbar(np.arange(6) + 1-0.1, d3['regrets_h6'].mean(0), alpha=0.7, yerr=(d3['regrets_h6'].mean(0) / math.sqrt(d3['regrets_h6'].shape[0])), color=palette[3], linestyle='--',  marker='o')

    plt.scatter(np.arange(1) + 1 -0.1, d4['regrets_h1'].mean(0), alpha=0.7,  marker='s', color=palette[4])
    plt.errorbar(np.arange(1) + 1-0.1, d4['regrets_h1'].mean(0), alpha=0.7, yerr=(d4['regrets_h1'].mean(0) / math.sqrt(d4['regrets_h1'].shape[0])), color=palette[4])
    plt.errorbar(np.arange(6) + 1-0.1, d4['regrets_h6'].mean(0), alpha=0.7, yerr=(d4['regrets_h6'].mean(0) / math.sqrt(d4['regrets_h6'].shape[0])), color=palette[4], linestyle='--',  marker='o')

    plt.scatter(np.arange(1) + 1 +0.1, human_regrets_h1.mean(0), alpha=0.7, marker='s', color='black')
    plt.errorbar(np.arange(1) + 1 +0.1, human_regrets_h1.mean(0), alpha=0.7, yerr=(human_regrets_h1.mean(0) / math.sqrt(human_regrets_h1.shape[0])), color='black')
    plt.errorbar(np.arange(6) + 1 + 0.1, human_regrets_h6.mean(0), alpha=0.7, yerr=(human_regrets_h6.mean(0) / math.sqrt(human_regrets_h6.shape[0])), color='black', linestyle='-', marker='o')

    plt.text(5.0, random_regrets_h6.mean() - 0.55, 'random', color='C3', alpha=0.7, size=10)
    plt.ylabel('Mean regret')

    plt.xlim(0.75, 6.25)
    plt.xlabel('Trials')
    plt.ylim(0, 11)
    plt.legend(custom_lines, ['Horizon 1', 'Horizon 6', 'Humans', 'Quasi CoT', 'CoT', 'CoT-Exploit', 'CoT-Explore'], frameon=False, bbox_to_anchor=(0.0,1.02,1,0.2), loc="lower left",  borderaxespad=0, ncol=2, handlelength=1.5, handletextpad=0.5, mode='expand')
    plt.savefig('final_figures_3/gpt-3.5-turbo_CoT_Hints.pdf', bbox_inches='tight')
    plt.show()

    return

In [None]:
CoT_hints_custom_lines = [Line2D([0], [0], color='black', marker='s', linestyle='None'),
    Line2D([0], [0], color='black', linestyle='--'),
    Line2D([0], [0], color='black',  linestyle='-'), # humans
    Line2D([0], [0], color=palette[1], linestyle='-'), # line 2 Quasi CoT
    Line2D([0], [0], color=palette[2], linestyle='-'), # line 3 CoT
    Line2D([0], [0], color=palette[3], linestyle='-'), # CoT with Exploit
    Line2D([0], [0], color=palette[4], linestyle='-')] # CoT with Explore


In [None]:
# turbo 3.5
quasi_cot = get_data("PATH TO FILES WITH QUASI-COT")
cot = get_data("PATH TO FILES WITH COT")
cot_exploit = get_data("PATH TO FILES WITH COT-EXPLOIT")
cot_explore = get_data("PATH TO FILES WITH COT-EXPLORE")

In [None]:
# gpt-3.5-turbo
plot_data_multi(quasi_cot, cot, cot_exploit, cot_explore, human_data, CoT_hints_custom_lines)