In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [2]:
pd.set_option('display.max_columns', None)

In [3]:
# 20240716 re-run as raw file is updated
gpt4o_result = pd.read_excel('raw/result_GPT4o.xlsx')

# 20240716 added
claude_result = pd.read_excel('raw/result_Claude.xlsx')

# 20240716 exclude from the run as already done
gpt35_result = pd.read_excel('raw/result_GPT35.xlsx')
gpt4_result = pd.read_excel('raw/result_GPT4.xlsx')

# 20240716 dropped from the scope
gemini_result = pd.read_excel('raw/result_Gemini.xlsx')
gemini_old_result = pd.read_excel('raw/result_Gemini (proto).xlsx')


In [4]:
# dial the target models to True
run_flag = {'gpt35'       : False,
            'gpt4'        : False,
            'gpt4o'       : True,
            'gemini_old'  : False,
            'gemini'      : False,  #ultra
            'claude'      : True}   #sonnet

In [5]:
gpt4o_result.head()

Unnamed: 0,type,posts_filtered,Row,1st choice,2nd choice,3rd choice,Justification for 1st choice (EI),Justification for 1st choice (NS),Justification for 1st choice (TF),Justification for 1st choice (PJ)
0,INFJ,"'OP, do you use psychotropic medication, like ...",#1,INFJ,INFP,ENFP,The speaker displays a preference for introspe...,The speaker frequently engages in abstract and...,"The speaker's focus on values, harmony, and un...",The speaker exhibits a structured and organize...
1,INFJ,'That is why I LOVE the Disney film Enchanted....,#2,INFP,ISFP,INFJ,Reflects introspection and a preference for so...,Emphasizes emotional depth and abstract expres...,Prioritizes personal values and emotional unde...,Shows a contemplative and open-ended approach ...
2,INFJ,'I love malapropisms. Carmine 'Little Carmine'...,#3,INTP,ENTP,INFJ,Reflects a preference for solitary and introsp...,"Shows strong interest in abstract concepts, th...",Engages in logical analysis and philosophical ...,Displays a mix of structured knowledge and spo...
3,INFJ,'I often listen to music that mirrors my mood....,#1,INFP,INFJ,ENFP,Reflects introspection and a preference for so...,Shows a preference for exploring abstract idea...,Prioritizes personal values and emotional unde...,Demonstrates flexibility and spontaneity in ap...
4,INFJ,'Please ignore this post. I just misread someo...,#2,INFP,ISFP,INFJ,Reflects introspection and a preference for so...,Emphasizes emotional depth and abstract expres...,Prioritizes personal values and emotional unde...,Shows a contemplative and open-ended approach ...


In [6]:
def split_type (row: str, index: int): #j
    splitted = list(row)
    return splitted[index]

In [7]:
def splitting_types_to_columns (cols_list, df):
    naming = [' (EI)', ' (NS)', ' (FT)', ' (JP)']
    for i, col in enumerate(cols_list):
        col_index = df.columns.tolist().index(col)
        for j, name in enumerate(naming):
            new_name = col + name
            value = df[col].apply(lambda row: split_type(row, j))
            df.insert(loc=col_index+j+1, column= new_name, value=value)
    return df

In [8]:
def add_bool (df):
    namings = ['full', '(EI)', '(NS)', '(FT)', '(JP)']
    choices = ['1st ', '2nd ', '3rd ']
    for i, choice in enumerate(choices):
        for j, col in enumerate(namings):
            start_index = (i+1)*(len(namings))
            actual = df.iloc[:,j]
            pred = df.iloc[:,start_index + j]
            bool_series = actual == pred
            new_col_name = 'is matched ' + choice + col
            df[new_col_name] = bool_series
    return df

In [9]:
def generate_df_for_accuracy (df):
    col_names = df.columns.tolist()

    accuracy_check_cols = ['type', '1st choice', '2nd choice', '3rd choice']
    accuracy_check = df[accuracy_check_cols]
    accuracy_check = accuracy_check[~accuracy_check.apply(lambda row: 'na' in row.values, axis=1)]

    accuracy_check = splitting_types_to_columns(accuracy_check_cols, accuracy_check)
    accuracy_check = add_bool(accuracy_check)
    return accuracy_check

In [10]:
def generate_df_for_justification (df):
    selected_cols = ['type', 'posts_filtered', '1st choice', 'Justification for 1st choice (EI)', 'Justification for 1st choice (NS)',	'Justification for 1st choice (TF)', 'Justification for 1st choice (PJ)']
    split_target_cols = ['type', '1st choice']
    new_df = df[selected_cols]
    new_df = new_df[~new_df.apply(lambda row: 'na' in row.values, axis=1)]
    new_df = splitting_types_to_columns (split_target_cols, new_df)

    #----------purify df by changing columns order------------
    popped_data = new_df.pop('posts_filtered')
    index = new_df.columns.tolist().index('Justification for 1st choice (EI)')
    new_df.insert(loc=index, column='posts_filtered', value=popped_data)

    #---------add matched flag b/w original/predicted---------
    col_0 = new_df.columns.tolist()[:index]
    new_df_0 = new_df[col_0]
    new_cols = ['is_matched(full)', 'is_matched(EI)', 'is_matched(NS)', 'is_matched(FT)', 'is_matched(JP)']
    for i, col in enumerate(new_cols):
        new_df_0[col] = new_df_0.iloc[:,i] == new_df_0.iloc[:,i+len(new_cols)]
    new_df = pd.concat([new_df_0, new_df[new_df.columns.tolist()[index:]]], axis = 1)
    
    return new_df

In [11]:
def generate_dfs (df):
    return generate_df_for_accuracy (df), generate_df_for_justification(df)

# gpt4o data transformation

In [12]:
if run_flag['gpt4o'] == True:
    gpt4o_result_accuracy, gpt4o_result_justification = generate_dfs(gpt4o_result)
    print(gpt4o_result_accuracy.shape)
    print(gpt4o_result_justification.shape)

(624, 35)
(624, 20)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df_0[col] = new_df_0.iloc[:,i] == new_df_0.iloc[:,i+len(new_cols)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df_0[col] = new_df_0.iloc[:,i] == new_df_0.iloc[:,i+len(new_cols)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df_0[col] = new_df_0.iloc[:,i] == new_df_0.iloc[:,i+len(new

In [13]:
if run_flag['gpt4o'] == True:
    print(gpt4o_result_accuracy['type'].value_counts())

type
INFJ    39
ENTP    39
INTP    39
INTJ    39
ENTJ    39
ENFJ    39
INFP    39
ENFP    39
ISFP    39
ISTP    39
ISFJ    39
ISTJ    39
ESTP    39
ESFP    39
ESTJ    39
ESFJ    39
Name: count, dtype: int64


# gpt4 data transformation

In [14]:
if run_flag['gpt4'] == True:
    gpt4_result_accuracy, gpt4_result_justification = generate_dfs(gpt4_result)
    print(gpt4_result_accuracy.shape)
    print(gpt4_result_justification.shape)

In [15]:
if run_flag['gpt4'] == True:
    print(gpt4_result_accuracy['type'].value_counts())

In [16]:
if run_flag['claude'] == True:
    claude_result_accuracy, claude_result_justification = generate_dfs(claude_result)
    print(claude_result_accuracy.shape)
    print(claude_result_justification.shape)

(624, 35)
(624, 20)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df_0[col] = new_df_0.iloc[:,i] == new_df_0.iloc[:,i+len(new_cols)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df_0[col] = new_df_0.iloc[:,i] == new_df_0.iloc[:,i+len(new_cols)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df_0[col] = new_df_0.iloc[:,i] == new_df_0.iloc[:,i+len(new

In [17]:
if run_flag['claude'] == True:
    print(claude_result_accuracy['type'].value_counts())

type
INFJ    39
ENTP    39
INTP    39
INTJ    39
ENTJ    39
ENFJ    39
INFP    39
ENFP    39
ISFP    39
ISTP    39
ISFJ    39
ISTJ    39
ESTP    39
ESFP    39
ESTJ    39
ESFJ    39
Name: count, dtype: int64


# gemini ultra data transformation

In [18]:
if run_flag['gemini'] == True:
    gemini_result_accuracy, gemini_result_justification = generate_dfs(gemini_result)

In [19]:
if run_flag['gemini'] == True:
    gemini_result_accuracy['type'].value_counts()

# gpt3.5 data transformation

In [20]:
if run_flag['gpt35'] == True:
    gpt35_result_accuracy, gpt35_result_justification = generate_dfs(gpt35_result)
    print(gpt35_result_accuracy.shape)
    print(gpt35_result_justification.shape)

# gemini data tranformation

In [21]:
if run_flag['gemini_old'] == True:
    gemini_old_result_accuracy, gemini_old_result_justification = generate_dfs(gemini_old_result)
    print(gemini_old_result_accuracy.shape)
    print(gemini_old_result_justification.shape)

In [22]:
def save_to_excel(variable_name, directory='transformed'):
    if variable_name in globals():
        globals()[variable_name].to_excel(f'{directory}/{variable_name}.xlsx', index=False)
    else:
        print(f"Variable {variable_name} does not exist.")

# Loop through the run flags
for model_name, flag in run_flag.items():
    if flag:
        accuracy_table_name = f"{model_name}_result_accuracy"
        justification_table_name = f"{model_name}_result_justification"
        
        save_to_excel(accuracy_table_name)
        save_to_excel(justification_table_name)

        print(f"saved: {accuracy_table_name}")
        print(f"saved: {justification_table_name}")

saved: gpt4o_result_accuracy
saved: gpt4o_result_justification
saved: claude_result_accuracy
saved: claude_result_justification


"\ngpt4o_result_accuracy.to_excel('transformed/gpt4o_result_accuracy.xlsx', index = False)\ngpt4_result_accuracy.to_excel('transformed/gpt4_result_accuracy.xlsx', index = False)\ngemini_result_accuracy.to_excel('transformed/gemini_result_accuracy.xlsx', index = False)\ngpt35_result_accuracy.to_excel('transformed/gpt35_result_accuracy.xlsx', index = False)\ngemini_old_result_accuracy.to_excel('transformed/gemini_old_result_accuracy.xlsx', index = False)\n\ngpt4o_result_justification.to_excel('transformed/gpt4o_result_justification.xlsx', index = False)\ngpt4_result_justification.to_excel('transformed/gpt4_result_justification.xlsx', index = False)\ngemini_result_justification.to_excel('transformed/gemini_result_justification.xlsx', index = False)\ngpt35_result_justification.to_excel('transformed/gpt35_result_justification.xlsx', index = False)\ngemini_old_result_justification.to_excel('transformed/gemini_old_result_justification.xlsx', index = False)\n"

To assess the accuracy of LLMs MBTI type predictions
- gpt4o_result_accuracy
- gpt4_result_accuracy
- gemini_result_accuracy

To analyze the justification of the 1st choice
- gpt4o_result_justification
- gpt4_result_justification
- gemini_result_justification

To conduct a brief review on hypothesis that older models perform better
- gpt35_result_accuracy
- gemini_old_result_accuracy
- gpt35_result_justification
- gemini_old_result_justification

