In [1]:
import pandas as pd

In [2]:
# load bfi_moral_temp0.7.json 
import json
with open('bfi_to_mini_temp0.json') as f:
    data = json.load(f)

In [3]:
response = []

for obj in data:
    # Iterate through each choice
    for choice in obj['choices']:
        # Extract the content field
        content = choice['message']['content']
        # Check if content starts with ```json\n and ends with \n```, and remove these parts
        if content.startswith("```json\n") and content.endswith("\n```"):
            content = content[7:-4]  # Remove the ```json\n prefix and \n``` suffix
        # Parse the content as JSON
        content_json = json.loads(content)
        # Add the decision to the list of response
        response.append(content_json)

In [4]:
decision_df = pd.DataFrame(response)

In [5]:
decision_df.head()

Unnamed: 0,Bashful,Bold,Careless,Cold,Complex,Cooperative,Creative,Deep,Disorganized,Efficient,...,Systematic,Talkative,Temperamental,Touchy,Uncreative,Unenvious,Unintellectual,Unsympathetic,Warm,Withdrawn
0,2,4,4,1,2,5,4,2,2,4,...,2,5,1,1,4,5,4,1,5,4
1,4,2,4,4,4,4,4,4,4,4,...,4,4,4,4,2,4,4,4,4,4
2,5,3,4,2,4,5,5,4,2,4,...,5,1,1,1,2,9,2,2,5,5
3,6,6,4,4,6,6,6,6,4,4,...,4,6,4,4,4,6,4,4,6,4
4,4,4,2,3,3,4,4,3,1,5,...,5,3,3,4,2,5,2,2,4,4


In [6]:
# check if there is any missing value
decision_df.isnull().sum()

Bashful           0
Bold              0
Careless          0
Cold              0
Complex           0
Cooperative       0
Creative          0
Deep              0
Disorganized      0
Efficient         0
Energetic         0
Envious           0
Extraverted       0
Fretful           0
Harsh             0
Imaginative       0
Inefficient       0
Intellectual      0
Jealous           0
Kind              0
Moody             0
Organized         0
Philosophical     0
Practical         0
Quiet             0
Relaxed           0
Rude              0
Shy               0
Sloppy            0
Sympathetic       0
Systematic        0
Talkative         0
Temperamental     0
Touchy            0
Uncreative        0
Unenvious         0
Unintellectual    0
Unsympathetic     0
Warm              0
Withdrawn         0
dtype: int64

Great, the data is complete and ready for analysis.

In [7]:
def reverse_score(score):
    return 10 - score

def calculate_big_five_scores(df):
    # Mapping of dimensions to their items (with indication of whether to reverse-score the item)
    dimensions = {
        'miniMarker_simulated_E': [('Bashful', True), ('Bold', False), ('Energetic', False), ('Extraverted', False), ('Quiet', True), ('Shy', True), ('Talkative', False), ('Withdrawn', True)],
        
        'miniMarker_simulated_A': [('Cold', True), ('Cooperative', False), ('Harsh', True), ('Kind', False), ('Rude', True), ('Sympathetic', False), ('Unsympathetic', True), ('Warm', False)],
        
        'miniMarker_simulated_C': [('Careless', True), ('Disorganized', True), ('Efficient', False), ('Inefficient', True), ('Organized', False), ('Practical', False), ('Sloppy', True), ('Systematic', False)],
        
        'miniMarker_simulated_N': [('Envious', False), ('Fretful', False), ('Jealous', False), ('Moody', False), ('Relaxed', True), ('Temperamental', False), ('Touchy', False), ('Unenvious', True)],
        
        'miniMarker_simulated_O': [('Complex', False), ('Deep', False), ('Creative', False), ('Imaginative', False), ('Intellectual', False), ('Philosophical', False), ('Uncreative', True), ('Unintellectual', True)]
    }

    # Initialize a dictionary to hold the total scores for each dimension
    scores = {dimension: 0 for dimension in dimensions}

    # Iterate through each dimension and its items
    for dimension, items in dimensions.items():
        for item, reverse in items:
            # Sum the scores for each item, reversing the score if necessary
            if reverse:
                scores[dimension] += df[item].apply(reverse_score)
            else:
                scores[dimension] += df[item]

    # Convert the scores dictionary to a DataFrame for easier viewing
    scores_df = pd.DataFrame(scores, index=df.index)

    return scores_df

domain_score = calculate_big_five_scores(decision_df)

domain_score.head()

Unnamed: 0,miniMarker_simulated_E,miniMarker_simulated_A,miniMarker_simulated_C,miniMarker_simulated_N,miniMarker_simulated_O
0,47,53,43,24,26
1,38,40,40,36,38
2,28,48,49,20,38
3,44,48,40,34,48
4,37,44,53,31,37


In [8]:
# concatenate the two dataframes
simulation_df = pd.concat([decision_df, domain_score], axis=1)

# preview the first 3 rows
simulation_df.head(3)

Unnamed: 0,Bashful,Bold,Careless,Cold,Complex,Cooperative,Creative,Deep,Disorganized,Efficient,...,Unenvious,Unintellectual,Unsympathetic,Warm,Withdrawn,miniMarker_simulated_E,miniMarker_simulated_A,miniMarker_simulated_C,miniMarker_simulated_N,miniMarker_simulated_O
0,2,4,4,1,2,5,4,2,2,4,...,5,4,1,5,4,47,53,43,24,26
1,4,2,4,4,4,4,4,4,4,4,...,4,4,4,4,4,38,40,40,36,38
2,5,3,4,2,4,5,5,4,2,4,...,9,2,2,5,5,28,48,49,20,38


In [9]:
# load soto's data 
soto_df = pd.read_csv('study1_data_no_simulation.csv')

# concatenate the two dataframes
df = pd.concat([soto_df, simulation_df], axis=1)

# preview the first 3 rows
df.head(3)

Unnamed: 0,case_id,age,sex,ethnicity,rel_acquaintance,rel_friend,rel_roommate,rel_boygirlfriend,rel_relative,rel_other,...,Unenvious,Unintellectual,Unsympathetic,Warm,Withdrawn,miniMarker_simulated_E,miniMarker_simulated_A,miniMarker_simulated_C,miniMarker_simulated_N,miniMarker_simulated_O
0,1,27.0,M,2.0,,,,,,,...,5,4,1,5,4,47,53,43,24,26
1,2,26.0,M,3.0,,,,,,,...,4,4,4,4,4,38,40,40,36,38
2,3,24.0,F,4.0,,,,,,,...,9,2,2,5,5,28,48,49,20,38


In [10]:
# save the dataframe to a csv file
df.to_csv('study1_with_simulation_result.csv', index=False)