## Import and Configuration

In [61]:
import os
import re
import csv
import json
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy import stats
from datetime import datetime
from collections import defaultdict, Counter
from matplotlib.ticker import FuncFormatter
from matplotlib.colors import ListedColormap

In [53]:
bigfive_human_data = pd.read_csv('data/bigfive_data.csv', delimiter='\t')
bigfive_human_data['hue'] = 'Human'

Unnamed: 0,race,age,engnat,gender,hand,source,country,E1,E2,E3,...,O2,O3,O4,O5,O6,O7,O8,O9,O10,hue
0,3,53,1,1,1,1,US,4,2,5,...,1,3,1,5,1,4,2,5,5,Human
1,13,46,1,2,1,1,US,2,2,3,...,3,3,3,2,3,3,1,3,2,Human
2,1,14,2,2,1,1,PK,5,1,1,...,5,5,1,5,1,5,5,5,5,Human
3,3,19,2,2,1,1,RO,2,5,2,...,3,5,2,4,2,5,2,5,5,Human
4,11,25,2,2,1,2,US,3,1,3,...,1,1,1,3,1,3,1,5,3,Human


In [54]:
### independent 30 instances
records_gpt4 = json.load(open('records/bigfive_gpt4_2023_06_26-01_37_11_PM.json', 'r'))
records_turbo = json.load(open('records/bigfive_turbo_2023_06_26-02_06_26_AM.json', 'r'))


In [55]:
bigfive_model_data = {}
bigfive_model_data['gpt4'] = pd.DataFrame(records_gpt4['choices'])
bigfive_model_data['turbo'] = pd.DataFrame(records_turbo['choices'])
bigfive_model_data['gpt4']['hue'] = 'ChatGPT-4'
bigfive_model_data['turbo']['hue'] = 'ChatGPT-3'

In [56]:
questions = {}
with open('data/bigfive.tsv', 'r') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    for row in reader:
        questions[row[0]] = row[1]

keyed = {}
indices = defaultdict(int)
dimensions = 'EACNO'
with open('data/bigfive_IPIP.tsv', 'r') as tsvfile:
    reader = csv.reader(tsvfile, delimiter='\t')
    for row in reader:
        d = dimensions[int(row[-1][1])-1]
        v = row[-1][2]
        indices[d] += 1
        k = '%s%i' % (d, indices[d])
        keyed[k] = v

In [57]:
models = ['gpt4', 'turbo']
beg_pos = list(range(7, 100, 10))[:5]
# d_scores_model = defaultdict(dict)

dimensions = 'ENACO'
for i, d in enumerate(dimensions):
    
    ### human scores
    d_score = 0
    for j in range(10):
        k = '%s%i' % (d, j+1)
        v = keyed[k]
        score = bigfive_human_data.iloc[:, beg_pos[i]+j]
        if v == '-': score = 6 - score
        d_score += score
    bigfive_human_data[d] = d_score

    ### model scores
    for model in models:
        d_score = 0
        records = eval('records_%s' % model)
        for j in range(10):
            k = '%s%i' % (d, j+1)
            v = keyed[k]
            score = bigfive_model_data[model].iloc[:, i*10+j]
            # score = np.mean(records['choices'][k])
            if v == '-': score = 6 - score
            d_score += score
        # d_scores_model[model][d] = d_score
        bigfive_model_data[model][d] = d_score

In [58]:
data = pd.concat([
    bigfive_human_data, 
    bigfive_model_data['gpt4'], 
    bigfive_model_data['turbo']
], ignore_index=True)
data[['E', 'hue']]
data['N'] = 60 - data['N']
data[[*dimensions, 'hue']]

Unnamed: 0,E,N,A,C,O,hue
0,44,11,46,47,43,Human
1,22,31,35,42,26,Human
2,35,46,38,49,45,Human
3,22,43,37,26,41,Human
4,34,30,44,34,34,Human
...,...,...,...,...,...,...
19774,30,30,30,33,28,ChatGPT-3
19775,30,30,32,32,28,ChatGPT-3
19776,31,30,30,33,29,ChatGPT-3
19777,30,30,32,33,28,ChatGPT-3


In [62]:
from scipy.stats import percentileofscore

for d in dimensions:
    median_gpt4 = np.median(data[data['hue'] == 'ChatGPT-4'][d])
    median_turbo = np.median(data[data['hue'] == 'ChatGPT-3'][d])
    human_data = data[data['hue'] == 'Human'][d]

In [60]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import numpy as np
from math import pi
import mplcursors

#color mapping 
hue_color_mapping = {
    'Human': '#1f77b4', 
    'ChatGPT-4': '#ff7f0e', 
    'ChatGPT-3': '#2ca02c',
}

#bar_chart function
def plot_bar_chart(selected_hues,display_charts):
    if not display_charts:
        print("Select Yes to See Specific Details about Categories")
        return
    
    filtered_data = data[data['hue'].isin(selected_hues)]
    category_medians = filtered_data.groupby('hue')[categories].median().reset_index()
    category_medians['hue'] = pd.Categorical(category_medians['hue'], categories=selected_hues, ordered=True)
    category_medians.sort_values('hue', inplace=True)
    plt.figure(figsize=(15, 10))
    for i, category in enumerate(categories, 1):
        plt.subplot(2, 3, i)
        bar_colors = [hue_color_mapping[hue] for hue in category_medians['hue']]
        plt.bar(category_medians['hue'], category_medians[category], color=bar_colors)
        plt.title(category)
        plt.ylabel('Median Value')
    plt.tight_layout()
    plt.show()

#boxplot functino
def plot_boxplot(selected_hues, display_details):
    if not display_details:
        return

    filtered_data = data[data['hue'].isin(selected_hues)]
    
    plt.figure(figsize=(15, 10))
    for i, category in enumerate(categories, 1):
        plt.subplot(2, 3, i)
        sns.boxplot(x='hue', y=category, hue='hue', data=filtered_data, palette=hue_color_mapping, legend=False)
        plt.title(f'{category} Distribution')
    plt.tight_layout()
    plt.show()


#radar chart function
def plot_radar(categories, selected_hues, show_values):
    N = len(categories)
    value_type = 'median'
    hue2line = {
    'Human': '--',
    'ChatGPT-4': '-',
    'ChatGPT-3': ':',
}
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]
    
    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    
    for hue in selected_hues:
        d_scores = data[data['hue'] == hue][[*dimensions]].agg(['median', 'mean', 'std'])
        values = np.array(d_scores.loc[value_type])
        errors = np.array(d_scores.loc['std']) * 2
        values = np.concatenate((values, [values[0]]))  
        errors = np.concatenate((errors, [errors[0]]))
        color = hue_color_mapping[hue]

        ax.fill_between(angles, values - errors, values + errors, alpha=0.2, color=color)
        ax.plot(angles, values, linestyle=hue2line[hue], linewidth=3, label=hue, color=color)
         
        #work on labels and text
        if show_values:
            for i, value in enumerate(values[:-1]):
                ax.text(angles[i],values[i],f"{value}",horizontalalignment='center', verticalalignment='bottom', color = 'blue',size = 15)



        ax.set_theta_offset(pi / 2)
        ax.set_theta_direction(-1)
        
    
    ax.set_thetagrids([a * 180 / pi for a in angles[:-1]], categories)
    plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
    
    plt.show()

categories = ['E', 'N', 'A', 'C', 'O']  # The Big Five traits
hue_options = ['Human', 'ChatGPT-4', 'ChatGPT-3']  # The categories to choose from

# Create a Checkbox for each category
checkboxes = [widgets.Checkbox(value=True, description=label) for label in hue_options]
interactive_feature_dropdown = widgets.Dropdown(
    options=[('No', False), ('Bar Chart', 'bar'), ('Boxplot', 'box')],
    value=False, 
    description='Show Differences:',
)
cb_container = widgets.VBox(children=checkboxes)
out = widgets.Output()
@out.capture(clear_output=True)


# Display the dropdown widget

def on_checkbox_change(b):
    selected_hues = [cb.description for cb in checkboxes if cb.value]
    show_values = False
    display_charts = interactive_feature_dropdown.value
 # If no checkbox is selected, re-select the one that was just deselected
    if not selected_hues:
        b.owner.value = True # Re-select the checkbox that triggered the change
        return 
        selected_hues = [b.owner.description]  # Update selected_hues to include only the re-selected checkbox
    with out:
        clear_output(wait=True)
        if len(selected_hues) == 1:
            plot_radar(categories, selected_hues, show_values=True)
        else:
            plot_radar(categories, selected_hues, show_values=False)
            if display_charts:
                if display_charts == 'bar':
                    plot_bar_chart(selected_hues,True)
                elif display_charts == 'box':
                    plot_boxplot(selected_hues,True)

for cb in checkboxes:
    cb.observe(on_checkbox_change, names='value')

interactive_feature_dropdown.observe(on_checkbox_change, names='value')
controls = widgets.VBox([*checkboxes, interactive_feature_dropdown])
display(widgets.HBox([controls,out]))
on_checkbox_change(None)


HBox(children=(VBox(children=(Checkbox(value=True, description='Human'), Checkbox(value=True, description='Cha…