In [4]:
import pandas as pd

# load data
df = pd.read_csv('../data/york_data_clean.csv')

df.head()

Unnamed: 0,Start Date,End Date,Response Type,IP Address,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Recipient Last Name,...,bfi52_mapped,bfi53_mapped,bfi54_mapped,bfi55_mapped,bfi56_mapped,bfi57_mapped,bfi58_mapped,bfi59_mapped,bfi60_mapped,bfi_combined
0,9/23/24 20:58,9/23/24 21:09,0,99.247.239.74,100,692,1,9/23/24 21:09,R_5d45GouOkUfD9rf,,...,I am fairly polite courteous to others.,I am fairly persistent usually work until the ...,I almost never feel depressed blue.,I have little interest in abstract ideas.,I show a fair amount of enthusiasm.,I sometimes assume the best about people.,I sometimes behave irresponsibly.,I am not at all temperamental almost never get...,I am quite original often come up with new ideas.,"I am quite outgoing, sociable. I am not at all..."
1,9/23/24 21:06,9/23/24 21:16,0,70.35.217.132,100,576,1,9/23/24 21:16,R_7h2Xcu888ru6Lap,,...,I am fairly polite courteous to others.,I am fairly persistent usually work until the ...,I sometimes feel depressed blue.,I have some interest in abstract ideas.,I show a fair amount of enthusiasm.,I sometimes assume the best about people.,I rarely behave irresponsibly.,I am quite temperamental often get emotional.,I am moderately original sometimes come up wit...,"I am somewhat outgoing, sociable. I am fairly ..."
2,9/23/24 20:59,9/23/24 21:17,0,70.53.73.20,100,1107,1,9/23/24 21:17,R_3pXno2C5AT8NsOE,,...,I am fairly polite courteous to others.,I am very persistent almost always work until ...,I almost never feel depressed blue.,I have a lot of interest in abstract ideas.,I show a lot of enthusiasm.,I usually assume the best about people.,I almost never behave irresponsibly.,I am moderately temperamental sometimes get em...,I am very original almost always come up with ...,"I am quite outgoing, sociable. I am moderately..."
3,9/23/24 21:04,9/23/24 21:22,0,23.233.10.215,100,1080,1,9/23/24 21:22,R_76kJbfHLQrYBL4X,,...,I am fairly polite courteous to others.,I am somewhat persistent sometimes work until ...,I almost always feel depressed blue.,I have a fair amount of interest in abstract i...,I show almost no enthusiasm.,I rarely assume the best about people.,I rarely behave irresponsibly.,I am quite temperamental often get emotional.,I am moderately original sometimes come up wit...,"I am quite reserved, unsociable. I am moderate..."
4,9/23/24 21:00,9/23/24 21:29,0,72.136.110.98,100,1738,1,9/23/24 21:29,R_1uFF8HZ890wuWdP,,...,I am fairly polite courteous to others.,I am somewhat persistent sometimes work until ...,I almost never feel depressed blue.,I have some interest in abstract ideas.,I show some enthusiasm.,I usually assume the best about people.,I rarely behave irresponsibly.,I am moderately temperamental sometimes get em...,I am moderately original sometimes come up wit...,"I am somewhat outgoing, sociable. I am fairly ..."


# Test with a single participant

In [5]:
from openai import OpenAI
from moral_stories import get_prompt
import os

# Test with a single participant
person1 = df.iloc[0]

# pass person1['combined'] to the get_prompt function
prompt = get_prompt(person1['bfi_combined'])

# print the prompt
print(prompt)

### Personality### 
I am quite outgoing, sociable. I am not at all compassionate almost never soft-hearted. I am fairly organized. I am very relaxed handle stress very well. I have no artistic interests. I am moderately assertive. I am very respectful almost always treat others with respect. I am sometimes lazy. I stay very optimistic after experiencing a setback. I am curious about a large number of things. I often feel excited or eager. I sometimes find fault with others. I am very dependable steady. I am somewhat moody sometimes have up and down mood swings. I am very inventive almost always find clever ways to do things. I am rarely quiet. I feel a great deal of sympathy for others. I am very systematic almost always keep things in order. I am sometimes tense. I am not at all fascinated by art music or literature. I am very dominant almost always act as a leader. I often start arguments with others. I have some difficulty getting started on tasks. I feel very secure comfortable wit

In [6]:
api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=api_key)
completion = client.chat.completions.create(
    temperature=1,
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": "You are a person with the given personality traits"},
        {"role": "user", "content": prompt}
    ]
)
completion.choices[0].message.content

'{\n    "Confidential_Info": 7,\n    "Underage_Drinking": 8,\n    "Exam_Cheating": 5,\n    "Honest_Feedback": 6,\n    "Workplace_Theft": 4\n}'

# Test with multiple participants

In [10]:
import pandas as pd
from openai import OpenAI
from moral_stories import get_prompt
import os
import json
import re
from pathlib import Path
import concurrent.futures
import time
from tenacity import retry, stop_after_attempt, wait_exponential

# Load data
df = pd.read_csv('../data/york_data_clean.csv')

client = OpenAI(api_key=api_key)

# Set temperature as a constant
TEMPERATURE = 1

def extract_jsons(text):
    json_pattern = r'(```json\s*)?\s*(\{[^}]+\})\s*(```)?'
    matches = re.finditer(json_pattern, text, re.DOTALL)
    extracted_jsons = []
    for match in matches:
        json_str = match.group(2)
        try:
            json_obj = json.loads(json_str)
            extracted_jsons.append(json_obj)
        except json.JSONDecodeError:
            print(f"Warning: Could not parse JSON: {json_str}")
    return extracted_jsons

@retry(stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=4, max=10))
def process_participant(person):
    prompt = get_prompt(person['bfi_combined'])
    
    completion = client.chat.completions.create(
        temperature=TEMPERATURE,
        model="gpt-3.5-turbo-0125",
        messages=[
            {"role": "system", "content": "You are a person with the given personality traits. Respond with a JSON object containing your moral assessment."},
            {"role": "user", "content": prompt}
        ]
    )
    
    response_text = completion.choices[0].message.content
    extracted_jsons = extract_jsons(response_text)
    
    if extracted_jsons:
        return extracted_jsons[0]  # Return the first extracted JSON
    else:
        raise ValueError("No valid JSON found in the response")

def process_participant_with_retries(index):
    person = df.iloc[index]
    try:
        response = process_participant(person)
        return index, response
    except Exception as e:
        print(f"Error processing participant {index}: {str(e)}")
        return index, {"error": str(e)}

# Process all participants in parallel
num_participants = len(df)
results = [None] * num_participants

# Process participants in batches to manage memory and API rate limits
BATCH_SIZE = 35

for batch_start in range(0, num_participants, BATCH_SIZE):
    batch_end = min(batch_start + BATCH_SIZE, num_participants)
    print(f"Processing participants {batch_start} to {batch_end - 1}")

    with concurrent.futures.ThreadPoolExecutor(max_workers=BATCH_SIZE) as executor:
        future_to_index = {executor.submit(process_participant_with_retries, i): i for i in range(batch_start, batch_end)}
        
        for future in concurrent.futures.as_completed(future_to_index):
            index, result = future.result()
            results[index] = result

    print(f"Completed batch {batch_start} to {batch_end - 1}")

# Create a 'moral' directory in the current working directory
moral_dir = Path.cwd() / "moral"
moral_dir.mkdir(exist_ok=True)

# Save results to JSON file with temperature in the filename
output_file = moral_dir / f"moral_{TEMPERATURE}.json"
with open(output_file, "w") as f:
    json.dump(results, f, indent=4)

print(f"Data processed and saved to {output_file}")

Processing participants 0 to 24
Completed batch 0 to 24
Processing participants 25 to 49
Completed batch 25 to 49
Processing participants 50 to 74
Completed batch 50 to 74
Processing participants 75 to 99
Completed batch 75 to 99
Processing participants 100 to 124
Completed batch 100 to 124
Processing participants 125 to 149
Completed batch 125 to 149
Processing participants 150 to 174
Completed batch 150 to 174
Processing participants 175 to 199
Completed batch 175 to 199
Processing participants 200 to 224
Completed batch 200 to 224
Processing participants 225 to 249
Completed batch 225 to 249
Processing participants 250 to 274
Completed batch 250 to 274
Processing participants 275 to 299
Completed batch 275 to 299
Processing participants 300 to 324
Completed batch 300 to 324
Processing participants 325 to 349
Completed batch 325 to 349
Processing participants 350 to 355
Completed batch 350 to 355
Data processed and saved to /Users/mhhuang/psychometrics_AI/human_study/simulation/moral

In [9]:
import pandas as pd
from openai import OpenAI
from moral_stories import get_prompt
import os
import json
import re
from pathlib import Path
import time
from tenacity import retry, stop_after_attempt, wait_exponential

# Load data
df = pd.read_csv('../data/york_data_clean.csv')

client = OpenAI(api_key=api_key)

# Set temperature as a constant
TEMPERATURE = 0

def extract_jsons(text):
    json_pattern = r'(```json\s*)?\s*(\{[^}]+\})\s*(```)?'
    matches = re.finditer(json_pattern, text, re.DOTALL)
    extracted_jsons = []
    for match in matches:
        json_str = match.group(2)
        try:
            json_obj = json.loads(json_str)
            extracted_jsons.append(json_obj)
        except json.JSONDecodeError:
            print(f"Warning: Could not parse JSON: {json_str}")
    return extracted_jsons

@retry(stop=stop_after_attempt(10), wait=wait_exponential(multiplier=1, min=4, max=10))
def process_participant(person):
    prompt = get_prompt(person['bfi_combined'])
    
    completion = client.chat.completions.create(
        temperature=TEMPERATURE,
        model="gpt-3.5-turbo-0125",
        messages=[
            {"role": "system", "content": "You are a person with the given personality traits. Respond with a JSON object containing your moral assessment."},
            {"role": "user", "content": prompt}
        ]
    )
    
    response_text = completion.choices[0].message.content
    extracted_jsons = extract_jsons(response_text)
    
    if extracted_jsons:
        return extracted_jsons[0]  # Return the first extracted JSON
    else:
        raise ValueError("No valid JSON found in the response")

def retry_failed_requests(results):
    for index, result in enumerate(results):
        if isinstance(result, dict) and 'error' in result:
            print(f"Retrying participant {index}")
            try:
                person = df.iloc[index]
                response = process_participant(person)
                results[index] = response
                print(f"Successfully retried participant {index}")
            except Exception as e:
                print(f"Error retrying participant {index}: {str(e)}")
                results[index] = {"error": str(e)}
        time.sleep(1)  # Add a small delay between retries to avoid hitting rate limits
    return results

# Load the existing results
moral_dir = Path.cwd() / "moral"
input_file = moral_dir / f"moral_{TEMPERATURE}.json"
with open(input_file, "r") as f:
    results = json.load(f)

# Retry failed requests
updated_results = retry_failed_requests(results)

# Save updated results to JSON file
output_file = moral_dir / f"moral_{TEMPERATURE}.json"
with open(output_file, "w") as f:
    json.dump(updated_results, f, indent=4)

print(f"Updated data processed and saved to {output_file}")

Retrying participant 307
Successfully retried participant 307
Retrying participant 313
Successfully retried participant 313
Retrying participant 314
Successfully retried participant 314
Retrying participant 316
Successfully retried participant 316
Retrying participant 317
Successfully retried participant 317
Retrying participant 318
Successfully retried participant 318
Retrying participant 319
Successfully retried participant 319
Retrying participant 324
Successfully retried participant 324
Retrying participant 326
Successfully retried participant 326
Retrying participant 328
Successfully retried participant 328
Retrying participant 330
Successfully retried participant 330
Retrying participant 332
Successfully retried participant 332
Retrying participant 336
Successfully retried participant 336
Retrying participant 337
Successfully retried participant 337
Retrying participant 339
Successfully retried participant 339
Retrying participant 343
Successfully retried participant 343
Retrying