# Unpack Firebase Data for Statistical Analysis

This notebook unpacks the Firebase JSON export into two dataframes:
1. **Participant Data**: One row per participant with survey data and experimental conditions
2. **Conversation Data**: One row per message with participant ID, role, message content, conversation ID, and system prompt


In [58]:
import json
import pandas as pd
import numpy as np
from datetime import datetime


## Load JSON Data


In [59]:
# Load the JSON file
json_path = 'json/mech-chat-ee0c5-default-rtdb-pilot-oct9-session3-export (2).json'

with open(json_path, 'r') as f:
    data = json.load(f)

participant_data = data['participantData']
print(f"Total participants: {len(participant_data)}")


Total participants: 90


## 1. Create Participant-Level Dataframe


In [75]:
def extract_participant_info(firebase_id, p_data):
    """Extract all relevant participant information for statistical analysis."""
    
    row = {
        'firebase_id': firebase_id,
    }
    
    # Basic identifiers (from urlParameters)
    url_params = p_data.get('urlParameters', {})
    row['prolific_id'] = url_params.get('PROLIFIC_PID', None)
    row['session_id'] = url_params.get('SESSION_ID', None)
    row['study_id'] = url_params.get('STUDY_ID', None)
    
    # Experimental condition
    exp_condition = p_data.get('experimentCondition', {})
    row['condition_name'] = exp_condition.get('conditionName', None)
    row['visualization_condition'] = exp_condition.get('visualizationCondition', None)
    row['assignment_method'] = exp_condition.get('assignmentMethod', None)
    row['condition_timestamp'] = exp_condition.get('timestamp', None)
    
    # Selected avatar
    avatar_data = p_data.get('selectedAvatar', {})
    row['avatar'] = avatar_data.get('avatar', None)
    row['avatar_timestamp'] = avatar_data.get('timestamp', None)
    
    # System prompt
    system_prompt = p_data.get('systemPrompt', {})
    row['system_prompt'] = system_prompt.get('prompt', None)
    row['system_prompt_timestamp'] = system_prompt.get('timestamp', None)
    
    # Pre-task survey - Phase 1
    pre_survey = p_data.get('preTaskSurvey', {})
    pre_phase1 = pre_survey.get('phase1', {}).get('responses', {})
    row['pre_predict_unintended_behaviors'] = pre_phase1.get('How well could you predict unintended behaviors from your system prompt?', None)
    row['pre_predict_negative_behaviors'] = pre_phase1.get('How well could you predict negative unintended behaviors from your system prompt?', None)
    
    # Pre-task survey - Phase 2 (Expected personality traits)
    pre_phase2 = pre_survey.get('phase2', {}).get('responses', {})
    row['pre_empathy'] = pre_phase2.get('Empathy', None)
    row['pre_encouraging'] = pre_phase2.get('Encouraging', None)
    row['pre_formality'] = pre_phase2.get('Formality', None)
    row['pre_funniness'] = pre_phase2.get('Funniness', None)
    row['pre_hallucination'] = pre_phase2.get('Hallucination', None)
    row['pre_honesty'] = pre_phase2.get('Honesty', None)
    row['pre_sociality'] = pre_phase2.get('Sociality', None)
    row['pre_toxicity'] = pre_phase2.get('Toxicity', None)
    
    # Pre-task survey - Phase 3
    pre_phase3 = pre_survey.get('phase3', {}).get('responses', {})
    row['pre_trust'] = pre_phase3.get('Given the relevant background about unintended model behaviors, how much do you trust this model?', None)
    
    # Pre-task survey metadata
    pre_metadata = pre_survey.get('metadata', {})
    row['pre_completion_time'] = pre_metadata.get('completion_time', None)
    row['pre_completion_timestamp'] = pre_metadata.get('completion_timestamp', None)
    row['pre_start_timestamp'] = pre_metadata.get('start_timestamp', None)
    
    # Post-task survey - Phase 1
    post_survey = p_data.get('postTaskSurvey', {})
    post_phase1 = post_survey.get('phase1', {}).get('responses', {})
    row['post_visualization_helpful'] = post_phase1.get('Did the visualization help you understand model behavior?', None)
    row['post_arrived_desired_character'] = post_phase1.get('Did you arrive at your desired character?', None)
    row['post_trust'] = post_phase1.get('Given the {relevant background abt unintended model behaviors}, how much do you trust this model?', None)
    row['post_predict_negative_behaviors'] = post_phase1.get('How well could you predict negative unintended behaviors from your system prompt?', None)
    row['post_predict_unintended_behaviors'] = post_phase1.get('How well could you predict unintended behaviors from your system prompt?', None)
    row['post_see_visualization_again'] = post_phase1.get('Would you like to see this visualization again in future interactions?', None)
    
    # Post-task survey - Phase 2 (Open-ended feedback)
    post_phase2 = post_survey.get('phase2', {}).get('responses', {})
    row['post_open_ended_feedback'] = post_phase2.get('openEndedFeedback', None)
    
    # Post-task survey metadata
    post_metadata = post_survey.get('metadata', {})
    row['post_completion_time'] = post_metadata.get('completion_time', None)
    row['post_completion_timestamp'] = post_metadata.get('completion_timestamp', None)
    
    # Conversation statistics
    conversation_history = p_data.get('conversationHistory', [])
    messages = p_data.get('messages', [])
    
    row['num_messages_conversation_history'] = len([m for m in conversation_history if m is not None])
    row['num_messages_detailed'] = len([m for m in messages if m is not None])
    row['num_user_messages'] = len([m for m in conversation_history if m and m.get('role') == 'user'])
    row['num_assistant_messages'] = len([m for m in conversation_history if m and m.get('role') == 'assistant'])
    
    return row

# Create participant dataframe
participant_rows = []
for firebase_id, p_data in participant_data.items():
    participant_rows.append(extract_participant_info(firebase_id, p_data))

df_participants = pd.DataFrame(participant_rows)

print(f"Participant dataframe shape: {df_participants.shape}")
print(f"\nColumns: {list(df_participants.columns)}")
df_participants.head()

Participant dataframe shape: (90, 39)

Columns: ['firebase_id', 'prolific_id', 'session_id', 'study_id', 'condition_name', 'visualization_condition', 'assignment_method', 'condition_timestamp', 'avatar', 'avatar_timestamp', 'system_prompt', 'system_prompt_timestamp', 'pre_predict_unintended_behaviors', 'pre_predict_negative_behaviors', 'pre_empathy', 'pre_encouraging', 'pre_formality', 'pre_funniness', 'pre_hallucination', 'pre_honesty', 'pre_sociality', 'pre_toxicity', 'pre_trust', 'pre_completion_time', 'pre_completion_timestamp', 'pre_start_timestamp', 'post_visualization_helpful', 'post_arrived_desired_character', 'post_trust', 'post_predict_negative_behaviors', 'post_predict_unintended_behaviors', 'post_see_visualization_again', 'post_open_ended_feedback', 'post_completion_time', 'post_completion_timestamp', 'num_messages_conversation_history', 'num_messages_detailed', 'num_user_messages', 'num_assistant_messages']


Unnamed: 0,firebase_id,prolific_id,session_id,study_id,condition_name,visualization_condition,assignment_method,condition_timestamp,avatar,avatar_timestamp,...,post_predict_negative_behaviors,post_predict_unintended_behaviors,post_see_visualization_again,post_open_ended_feedback,post_completion_time,post_completion_timestamp,num_messages_conversation_history,num_messages_detailed,num_user_messages,num_assistant_messages
0,0GOGgShO3UTPI9zJwv1oX7OgBg33,6726175b381af01a5aaef18a,68e83cf35bceae59be4dbf1a,68e836641411fb9f89e4e952,experimental,1,unknown,2025-10-09T22:54:15.683Z,Avatar/avatar-4.jpg,2025-10-09T22:54:20.591Z,...,1.0,1.0,7.0,the ai needs to be able to format responses be...,1760052000000.0,2025-10-09T23:15:40.476Z,12,12,6,6
1,1qfktC9Y92PKWTO6pDVsp8Pxgmu2,66231fc6e1ecba676935943a,68e83e1b58fd1e102ddda025,68e836641411fb9f89e4e952,control,0,unknown,2025-10-09T22:59:02.238Z,Avatar/avatar-10.jpg,2025-10-09T22:59:16.472Z,...,5.0,1.0,,"The interface was well done, The bot refused t...",1760052000000.0,2025-10-09T23:15:05.932Z,36,36,18,18
2,2PON97vGteXaKUGJLJRM3Ei8qiq2,5b68d45687af3100015849a9,68e92628ce740042584b5d9f,68e836641411fb9f89e4e952,experimental,1,unknown,2025-10-10T15:29:06.391Z,Avatar/avatar-12.jpg,2025-10-10T15:29:14.608Z,...,5.0,5.0,2.0,It was fun to creat the prompt. I liked being ...,1760111000000.0,2025-10-10T15:45:56.097Z,18,18,9,9
3,2jbIuV09Beckv7WemXsd4DLFnd13,67132df80dca7f04dfa88678,68e84a7942f51f6712d80697,68e836641411fb9f89e4e952,control,0,unknown,2025-10-09T23:51:59.367Z,Avatar/avatar-2.jpg,2025-10-09T23:52:11.560Z,...,2.0,5.0,,I like the idea of designing the ai bot. I fee...,1760055000000.0,2025-10-10T00:08:24.665Z,26,26,13,13
4,3cwsojHvQtWFdxqKkr4Cz7Orzgq2,55d4d11258c35800113dc2ab,68e91d30850472d0b3c44ab5,68e836641411fb9f89e4e952,control,0,unknown,2025-10-10T14:54:09.905Z,Avatar/avatar-5.jpg,2025-10-10T14:54:46.738Z,...,1.0,1.0,,I saw immediate improvement each time I update...,1760109000000.0,2025-10-10T15:13:28.722Z,6,6,3,3


In [76]:
# Filter out participants who did not complete the post-survey
print(f"Total participants before filtering: {len(df_participants)}")
print(f"Participants missing post-survey completion timestamp: {df_participants['post_completion_timestamp'].isna().sum()}")

# Keep only participants who completed the post-survey (have post_completion_timestamp)
df_participants = df_participants[df_participants['post_completion_timestamp'].notna()].copy()

print(f"\nTotal participants after filtering (completed post-survey): {len(df_participants)}")
print(f"\nExperimental conditions (filtered):")
print(df_participants['condition_name'].value_counts())

Total participants before filtering: 90
Participants missing post-survey completion timestamp: 10

Total participants after filtering (completed post-survey): 80

Experimental conditions (filtered):
condition_name
experimental    42
control         38
Name: count, dtype: int64


In [None]:

df_participants[['firebase_id', 'condition_name','post_open_ended_feedback']].to_csv("open_ended.csv", index=False)
df_participants.to_csv('open-ended.csv', index=False)
df_participants

Unnamed: 0,firebase_id,prolific_id,session_id,study_id,condition_name,visualization_condition,assignment_method,condition_timestamp,avatar,avatar_timestamp,...,post_predict_negative_behaviors,post_predict_unintended_behaviors,post_see_visualization_again,post_open_ended_feedback,post_completion_time,post_completion_timestamp,num_messages_conversation_history,num_messages_detailed,num_user_messages,num_assistant_messages
0,0GOGgShO3UTPI9zJwv1oX7OgBg33,6726175b381af01a5aaef18a,68e83cf35bceae59be4dbf1a,68e836641411fb9f89e4e952,experimental,1,unknown,2025-10-09T22:54:15.683Z,Avatar/avatar-4.jpg,2025-10-09T22:54:20.591Z,...,1.0,1.0,7.0,the ai needs to be able to format responses be...,1.760052e+12,2025-10-09T23:15:40.476Z,12,12,6,6
1,1qfktC9Y92PKWTO6pDVsp8Pxgmu2,66231fc6e1ecba676935943a,68e83e1b58fd1e102ddda025,68e836641411fb9f89e4e952,control,0,unknown,2025-10-09T22:59:02.238Z,Avatar/avatar-10.jpg,2025-10-09T22:59:16.472Z,...,5.0,1.0,,"The interface was well done, The bot refused t...",1.760052e+12,2025-10-09T23:15:05.932Z,36,36,18,18
2,2PON97vGteXaKUGJLJRM3Ei8qiq2,5b68d45687af3100015849a9,68e92628ce740042584b5d9f,68e836641411fb9f89e4e952,experimental,1,unknown,2025-10-10T15:29:06.391Z,Avatar/avatar-12.jpg,2025-10-10T15:29:14.608Z,...,5.0,5.0,2.0,It was fun to creat the prompt. I liked being ...,1.760111e+12,2025-10-10T15:45:56.097Z,18,18,9,9
3,2jbIuV09Beckv7WemXsd4DLFnd13,67132df80dca7f04dfa88678,68e84a7942f51f6712d80697,68e836641411fb9f89e4e952,control,0,unknown,2025-10-09T23:51:59.367Z,Avatar/avatar-2.jpg,2025-10-09T23:52:11.560Z,...,2.0,5.0,,I like the idea of designing the ai bot. I fee...,1.760055e+12,2025-10-10T00:08:24.665Z,26,26,13,13
4,3cwsojHvQtWFdxqKkr4Cz7Orzgq2,55d4d11258c35800113dc2ab,68e91d30850472d0b3c44ab5,68e836641411fb9f89e4e952,control,0,unknown,2025-10-10T14:54:09.905Z,Avatar/avatar-5.jpg,2025-10-10T14:54:46.738Z,...,1.0,1.0,,I saw immediate improvement each time I update...,1.760109e+12,2025-10-10T15:13:28.722Z,6,6,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,uz3VXXq40jhRTDs79wCgR2og0yG3,5f7529755aaa0e1a6e804640,68e84a7f74a8f08118858da0,68e836641411fb9f89e4e952,control,0,unknown,2025-10-09T23:52:11.800Z,Avatar/avatar-2.jpg,2025-10-09T23:52:27.338Z,...,6.0,6.0,,I think the AI did a good job listening and ca...,1.760055e+12,2025-10-10T00:09:11.428Z,24,24,12,12
86,v1dvJdgvhSbYoavR1my9cRSvHQp2,678a85c749e99bc3baed6452,68e85165273db9c073d80aca,68e836641411fb9f89e4e952,experimental,1,unknown,2025-10-10T00:21:21.529Z,Avatar/avatar-2.jpg,2025-10-10T00:21:40.576Z,...,5.0,5.0,5.0,"It was a very educative chat, i really like it",1.760057e+12,2025-10-10T00:38:16.620Z,8,8,4,4
87,vCxvD82bFnYGIDhTyZ2aYETqCjM2,5f1195465e0e70011e87f7e0,68e84d47064cc1238bd35437,68e836641411fb9f89e4e952,experimental,1,unknown,2025-10-10T00:04:27.856Z,Avatar/avatar-5.jpg,2025-10-10T00:05:26.130Z,...,5.0,4.0,7.0,This was a great exercise! I like the idea of ...,1.760056e+12,2025-10-10T00:24:55.286Z,12,12,6,6
88,vPjuEjarwGMGJnWBnAxbyZpbtug2,6298f0e21abd78cb684b723c,68e84a7ace94ddbef0e2602d,68e836641411fb9f89e4e952,experimental,1,unknown,2025-10-09T23:54:23.034Z,Avatar/avatar-8.jpg,2025-10-09T23:54:40.370Z,...,5.0,5.0,7.0,This was an interesting experiment. The interf...,1.760055e+12,2025-10-10T00:10:39.636Z,10,10,5,5


## 2. Create Conversation-Level Dataframe


In [79]:
def extract_conversation_messages(firebase_id, p_data):
    """Extract all conversation messages for a participant."""
    
    messages = []
    
    # Get prolific_id for easier reference (from urlParameters)
    url_params = p_data.get('urlParameters', {})
    prolific_id = url_params.get('PROLIFIC_PID', None)
    
    # Use the 'messages' field which has more detailed information including system prompts
    message_list = p_data.get('messages', [])
    
    # Create a conversation ID for this participant (they typically have one conversation session)
    conversation_id = f"{firebase_id}_conv_1"
    
    for idx, msg in enumerate(message_list):
        # Skip null messages
        if msg is None:
            continue
            
        message_row = {
            'participant_id': firebase_id,
            'prolific_id': prolific_id,
            'conversation_id': conversation_id,
            'message_index': idx,
            'message_id': msg.get('messageId', None),
            'role': msg.get('role', None),
            'message': msg.get('content', None),
            'system_prompt': msg.get('systemPrompt', None),
            'timestamp': msg.get('timestamp', None),
        }
        
        # Add message length
        if message_row['message']:
            message_row['message_length'] = len(message_row['message'])
            message_row['word_count'] = len(message_row['message'].split())
        else:
            message_row['message_length'] = 0
            message_row['word_count'] = 0
        
        messages.append(message_row)
    
    return messages

# Create conversation dataframe
all_messages = []
for firebase_id, p_data in participant_data.items():
    messages = extract_conversation_messages(firebase_id, p_data)
    all_messages.extend(messages)

df_conversations = pd.DataFrame(all_messages)

print(f"Total messages before filtering: {len(df_conversations)}")

# Filter to only include messages from participants who completed the post-survey
valid_participant_ids = df_participants['firebase_id'].unique()
df_conversations = df_conversations[df_conversations['participant_id'].isin(valid_participant_ids)].copy()

print(f"Total messages after filtering (only from participants who completed post-survey): {len(df_conversations)}")
print(f"\nConversation dataframe shape: {df_conversations.shape}")
print(f"\nColumns: {list(df_conversations.columns)}")
print(f"\nMessages per role:")
print(df_conversations['role'].value_counts())
df_conversations.head(10)

df_conversations['participant_id'].nunique()

Total messages before filtering: 1420
Total messages after filtering (only from participants who completed post-survey): 1382

Conversation dataframe shape: (1382, 11)

Columns: ['participant_id', 'prolific_id', 'conversation_id', 'message_index', 'message_id', 'role', 'message', 'system_prompt', 'timestamp', 'message_length', 'word_count']

Messages per role:
role
user         691
assistant    691
Name: count, dtype: int64


80

## 3. Basic Statistics and Validation


In [80]:
# Participant statistics
print("=" * 60)
print("PARTICIPANT DATAFRAME SUMMARY (Completed Post-Survey Only)")
print("=" * 60)
print(f"\nTotal participants: {len(df_participants)}")
print(f"\nExperimental conditions:")
print(df_participants['condition_name'].value_counts())
print(f"\nVisualization conditions:")
print(df_participants['visualization_condition'].value_counts())
print(f"\nMissing prolific IDs: {df_participants['prolific_id'].isna().sum()}")
print(f"Missing session IDs: {df_participants['session_id'].isna().sum()}")
print(f"\nAverage messages per participant:")
print(df_participants['num_messages_detailed'].describe())


PARTICIPANT DATAFRAME SUMMARY (Completed Post-Survey Only)

Total participants: 80

Experimental conditions:
condition_name
experimental    42
control         38
Name: count, dtype: int64

Visualization conditions:
visualization_condition
1    42
0    38
Name: count, dtype: int64

Missing prolific IDs: 0
Missing session IDs: 0

Average messages per participant:
count    80.000000
mean     17.275000
std      11.084229
min       2.000000
25%      10.000000
50%      14.000000
75%      22.000000
max      72.000000
Name: num_messages_detailed, dtype: float64


In [82]:
# Conversation statistics
print("=" * 60)
print("CONVERSATION DATAFRAME SUMMARY (Completed Post-Survey Only)")
print("=" * 60)
print(f"\nTotal messages: {len(df_conversations)}")
print(f"Total participants: {df_conversations['participant_id'].nunique()}")
print(f"\nMessages per participant:")
print(df_conversations.groupby('participant_id').size().describe())
print(f"\nAverage message length by role:")
print(df_conversations.groupby('role')['message_length'].mean())
print(f"\nAverage word count by role:")
print(df_conversations.groupby('role')['word_count'].mean())


CONVERSATION DATAFRAME SUMMARY (Completed Post-Survey Only)

Total messages: 1382
Total participants: 80

Messages per participant:
count    80.000000
mean     17.275000
std      11.084229
min       2.000000
25%      10.000000
50%      14.000000
75%      22.000000
max      72.000000
dtype: float64

Average message length by role:
role
assistant    924.167873
user          74.881331
Name: message_length, dtype: float64

Average word count by role:
role
assistant    151.677279
user          14.335745
Name: word_count, dtype: float64


## 4. Save Dataframes to CSV


In [83]:
# Save to CSV files
df_participants.to_csv('data_participants.csv', index=False)
df_conversations.to_csv('data_conversations.csv', index=False)

print("Dataframes saved successfully!")
print(f"  - data_participants.csv: {df_participants.shape}")
print(f"  - data_conversations.csv: {df_conversations.shape}")


Dataframes saved successfully!
  - data_participants.csv: (80, 39)
  - data_conversations.csv: (1382, 11)


## 5. Preview Data Quality


In [85]:
df_participants.isnull()

Unnamed: 0,firebase_id,prolific_id,session_id,study_id,condition_name,visualization_condition,assignment_method,condition_timestamp,avatar,avatar_timestamp,...,post_predict_negative_behaviors,post_predict_unintended_behaviors,post_see_visualization_again,post_open_ended_feedback,post_completion_time,post_completion_timestamp,num_messages_conversation_history,num_messages_detailed,num_user_messages,num_assistant_messages
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
86,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
87,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
88,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


People in the control condition do not see the 2 questionnaire items.

In [84]:
# Check for missing data in key columns
print("Missing data in participant dataframe:")
print(df_participants.isnull().sum()[df_participants.isnull().sum() > 0])


Missing data in participant dataframe:
post_visualization_helpful      38
post_see_visualization_again    38
dtype: int64


In [86]:
# Check for missing data in conversation dataframe
print("\nMissing data in conversation dataframe:")
print(df_conversations.isnull().sum()[df_conversations.isnull().sum() > 0])



Missing data in conversation dataframe:
Series([], dtype: int64)


## 6. Example: Link Conversations to Participant Data


In [None]:
# Example of merging conversation data with participant data
# This creates a combined view with experimental conditions for each message

df_combined = df_conversations.merge(
    df_participants[['firebase_id', 'condition_name', 'visualization_condition', 'prolific_id']],
    left_on='participant_id',
    right_on='firebase_id',
    how='left',
    suffixes=('', '_participant')
)

print("Combined dataframe shape:", df_combined.shape)
print("\nSample combined data:")
df_combined.head()

Combined dataframe shape: (1382, 15)

Sample combined data:


Unnamed: 0,participant_id,prolific_id,conversation_id,message_index,message_id,role,message,system_prompt,timestamp,message_length,word_count,firebase_id,condition_name,visualization_condition,prolific_id_participant
0,0GOGgShO3UTPI9zJwv1oX7OgBg33,6726175b381af01a5aaef18a,0GOGgShO3UTPI9zJwv1oX7OgBg33_conv_1,2,2,user,Hey pal. I need you to make me some instructio...,You are a kind and empathetic listener who kno...,2025-10-09T23:08:59.480Z,116,24,0GOGgShO3UTPI9zJwv1oX7OgBg33,experimental,1,6726175b381af01a5aaef18a
1,0GOGgShO3UTPI9zJwv1oX7OgBg33,6726175b381af01a5aaef18a,0GOGgShO3UTPI9zJwv1oX7OgBg33_conv_1,3,3,assistant,I'd be happy to help you out.\n\nHere are some...,You are a kind and empathetic listener who kno...,2025-10-09T23:09:09.907Z,3001,505,0GOGgShO3UTPI9zJwv1oX7OgBg33,experimental,1,6726175b381af01a5aaef18a
2,0GOGgShO3UTPI9zJwv1oX7OgBg33,6726175b381af01a5aaef18a,0GOGgShO3UTPI9zJwv1oX7OgBg33_conv_1,4,4,user,Can you please use bulleted lists or something...,You are a kind and empathetic listener who kno...,2025-10-09T23:09:25.478Z,62,11,0GOGgShO3UTPI9zJwv1oX7OgBg33,experimental,1,6726175b381af01a5aaef18a
3,0GOGgShO3UTPI9zJwv1oX7OgBg33,6726175b381af01a5aaef18a,0GOGgShO3UTPI9zJwv1oX7OgBg33_conv_1,5,5,assistant,I got a bit carried away with the text.\n\nHer...,You are a kind and empathetic listener who kno...,2025-10-09T23:09:35.301Z,2826,472,0GOGgShO3UTPI9zJwv1oX7OgBg33,experimental,1,6726175b381af01a5aaef18a
4,0GOGgShO3UTPI9zJwv1oX7OgBg33,6726175b381af01a5aaef18a,0GOGgShO3UTPI9zJwv1oX7OgBg33_conv_1,6,6,user,Can you suggest how to overcome the feeling of...,You are a kind and empathetic listener who kno...,2025-10-09T23:10:08.066Z,104,19,0GOGgShO3UTPI9zJwv1oX7OgBg33,experimental,1,6726175b381af01a5aaef18a


## 7. System Prompts and Persona Vectors


In [90]:
# Extract persona vectors from personaVectorLog
def extract_persona_vectors(firebase_id, p_data):
    """Extract all persona vector logs for a participant."""
    persona_log = p_data.get('personaVectorLog', {})
    
    rows = []
    for timestamp_key, entry in persona_log.items():
        system_prompt = entry.get('systemPrompt', None)
        persona_vector = entry.get('personaVector', {})
        condition = entry.get('condition', None)
        timestamp = entry.get('timestamp', None)
        
        # Extract persona vector values
        row = {
            'firebase_id': firebase_id,
            'timestamp': timestamp,
            'timestamp_key': timestamp_key,
            'condition': condition,
            'system_prompt': system_prompt,
        }
        
        # Extract each personality dimension
        for dimension, values in persona_vector.items():
            for trait, score in values.items():
                row[f'{dimension}_{trait}'] = score
        
        rows.append(row)
    
    return rows

# Extract all persona vectors
all_persona_vectors = []
for firebase_id, p_data in participant_data.items():
    vectors = extract_persona_vectors(firebase_id, p_data)
    all_persona_vectors.extend(vectors)

df_persona_vectors = pd.DataFrame(all_persona_vectors)

# Filter to only participants who completed the study
valid_participant_ids = df_participants['firebase_id'].unique()
df_persona_vectors = df_persona_vectors[df_persona_vectors['firebase_id'].isin(valid_participant_ids)].copy()

# Add prolific_id for easier reference
df_persona_vectors = df_persona_vectors.merge(
    df_participants[['firebase_id', 'prolific_id']], 
    on='firebase_id', 
    how='left'
)

print(f"Persona Vectors DataFrame")
print(f"Shape: {df_persona_vectors.shape}")
print(f"Columns: {list(df_persona_vectors.columns)}")
print(f"\nTotal persona vector logs: {len(df_persona_vectors)}")
print(f"Unique participants: {df_persona_vectors['firebase_id'].nunique()}")
df_persona_vectors.head()
df_persona_vectors

Persona Vectors DataFrame
Shape: (136, 22)
Columns: ['firebase_id', 'timestamp', 'timestamp_key', 'condition', 'system_prompt', 'empathy_empathetic', 'empathy_unempathetic', 'encouraging_discouraging', 'encouraging_encouraging', 'formality_casual', 'formality_formal', 'funniness_funny', 'funniness_serious', 'hallucination_factual', 'hallucination_hallucinatory', 'sociality_antisocial', 'sociality_social', 'sycophancy_honest', 'sycophancy_sycophantic', 'toxicity_respectful', 'toxicity_toxic', 'prolific_id']

Total persona vector logs: 136
Unique participants: 80


Unnamed: 0,firebase_id,timestamp,timestamp_key,condition,system_prompt,empathy_empathetic,empathy_unempathetic,encouraging_discouraging,encouraging_encouraging,formality_casual,...,funniness_serious,hallucination_factual,hallucination_hallucinatory,sociality_antisocial,sociality_social,sycophancy_honest,sycophancy_sycophantic,toxicity_respectful,toxicity_toxic,prolific_id
0,0GOGgShO3UTPI9zJwv1oX7OgBg33,2025-10-09T23:04:12.948Z,1760051052948,experimental_with_visualization,You are a kind and empathetic listener who kno...,0.058728,0.000000,0.227908,0.000000,0.222460,...,0.456792,0.557608,0.000000,0.0,0.130458,0.873105,0.000000,0.401360,0.0,6726175b381af01a5aaef18a
1,0GOGgShO3UTPI9zJwv1oX7OgBg33,2025-10-09T23:08:29.661Z,1760051309661,experimental_with_visualization,You are a kind and empathetic listener who kno...,0.000000,0.236365,0.000000,0.134735,0.000000,...,0.463153,0.249675,0.000000,0.0,0.264700,0.487834,0.000000,0.555800,0.0,6726175b381af01a5aaef18a
2,1qfktC9Y92PKWTO6pDVsp8Pxgmu2,2025-10-09T23:03:02.862Z,1760050982862,control_no_visualization,"You are Ligma, a compassionate and intuitive e...",0.950488,0.000000,0.000000,0.459710,0.149842,...,0.289126,0.000000,0.168534,0.0,0.580147,0.000000,0.081281,0.757708,0.0,66231fc6e1ecba676935943a
3,1qfktC9Y92PKWTO6pDVsp8Pxgmu2,2025-10-09T23:10:10.944Z,1760051410944,control_no_visualization,"You are Ligma, a compassionate and intuitive e...",0.950488,0.000000,0.000000,0.459710,0.149842,...,0.289126,0.000000,0.168534,0.0,0.580147,0.000000,0.081281,0.757708,0.0,66231fc6e1ecba676935943a
4,2PON97vGteXaKUGJLJRM3Ei8qiq2,2025-10-10T15:34:49.604Z,1760110489604,experimental_with_visualization,friendly and funny. The AI companion is very r...,0.048213,0.000000,0.000000,0.490000,0.000000,...,0.753099,0.010626,0.000000,0.0,0.728640,0.000000,0.046699,0.951475,0.0,5b68d45687af3100015849a9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,vCxvD82bFnYGIDhTyZ2aYETqCjM2,2025-10-10T00:09:39.656Z,1760054979656,experimental_with_visualization,A good listener who helps but doesn't try to c...,0.315672,0.000000,0.019390,0.000000,0.314458,...,0.598878,0.023011,0.000000,0.0,0.784912,0.000000,0.118099,0.862816,0.0,5f1195465e0e70011e87f7e0
132,vPjuEjarwGMGJnWBnAxbyZpbtug2,2025-10-09T23:57:50.695Z,1760054270695,experimental_with_visualization,"You should be a friendly, fun, feminine compan...",0.354558,0.000000,0.164256,0.000000,0.090429,...,0.522445,0.192163,0.000000,0.0,0.206210,0.357737,0.000000,0.535763,0.0,6298f0e21abd78cb684b723c
133,vPjuEjarwGMGJnWBnAxbyZpbtug2,2025-10-10T00:01:35.586Z,1760054495586,experimental_with_visualization,"You should be a friendly, fun, feminine compan...",0.424255,0.000000,0.000000,0.088743,0.518118,...,0.200312,0.208553,0.000000,0.0,0.207596,0.000000,0.067727,0.273654,0.0,6298f0e21abd78cb684b723c
134,vPjuEjarwGMGJnWBnAxbyZpbtug2,2025-10-10T00:04:29.582Z,1760054669582,experimental_with_visualization,"You should be a friendly, fun, feminine compan...",0.000000,0.271351,0.278047,0.000000,0.000000,...,0.710599,0.070298,0.000000,0.0,0.029589,0.000000,0.172633,0.687753,0.0,6298f0e21abd78cb684b723c


In [89]:
# Display sample entry with full details
print("\nSample persona vector entry:")
print("="*80)
sample = df_persona_vectors.iloc[0]
print(f"Participant: {sample['prolific_id']}")
print(f"Timestamp: {sample['timestamp']}")
print(f"\nSystem Prompt:\n{sample['system_prompt'][:300]}...")
print(f"\nPersona Vector Scores:")
print(f"  Empathy: empathetic={sample['empathy_empathetic']:.3f}, unempathetic={sample['empathy_unempathetic']:.3f}")
print(f"  Encouraging: encouraging={sample['encouraging_encouraging']:.3f}, discouraging={sample['encouraging_discouraging']:.3f}")
print(f"  Formality: formal={sample['formality_formal']:.3f}, casual={sample['formality_casual']:.3f}")
print(f"  Funniness: funny={sample['funniness_funny']:.3f}, serious={sample['funniness_serious']:.3f}")
print(f"  Hallucination: factual={sample['hallucination_factual']:.3f}, hallucinatory={sample['hallucination_hallucinatory']:.3f}")
print(f"  Sociality: social={sample['sociality_social']:.3f}, antisocial={sample['sociality_antisocial']:.3f}")
print(f"  Sycophancy: honest={sample['sycophancy_honest']:.3f}, sycophantic={sample['sycophancy_sycophantic']:.3f}")
print(f"  Toxicity: respectful={sample['toxicity_respectful']:.3f}, toxic={sample['toxicity_toxic']:.3f}")

# Save to CSV
df_persona_vectors.to_csv('data_clean/persona_vectors.csv', index=False)
print(f"\n✓ Saved to: data_clean/persona_vectors.csv")



Sample persona vector entry:
Participant: 6726175b381af01a5aaef18a
Timestamp: 2025-10-09T23:04:12.948Z

System Prompt:
You are a kind and empathetic listener who knows how to mirror people's emotions. You are my go to sidekick/companion.You do what I ask. You are friendly and outgoing, never looking to cause any negativity. You love to chat and help out in any way possible. You are basically a living diary and highl...

Persona Vector Scores:
  Empathy: empathetic=0.059, unempathetic=0.000
  Encouraging: encouraging=0.000, discouraging=0.228
  Formality: formal=0.000, casual=0.222
  Funniness: funny=0.000, serious=0.457
  Hallucination: factual=0.558, hallucinatory=0.000
  Sociality: social=0.130, antisocial=0.000
  Sycophancy: honest=0.873, sycophantic=0.000
  Toxicity: respectful=0.401, toxic=0.000

✓ Saved to: data_clean/persona_vectors.csv


## 8. Combined View: Expected vs Actual Trait Activations


In [94]:
# Get the first persona vector log entry for each participant (by timestamp)
df_first_vectors = df_persona_vectors.sort_values('timestamp').groupby('firebase_id').first().reset_index()

# Select the expected trait ratings (predictions) from pre-task survey
pre_traits = df_participants[[
    'firebase_id',
    'condition_name',
    'pre_empathy',
    'pre_encouraging', 
    'pre_formality',
    'pre_funniness',
    'pre_hallucination',
    'pre_honesty',
    'pre_sociality',
    'pre_toxicity'
]].copy()

# Merge: system prompt + actual activations from first log entry + predicted ratings
df_expected_vs_actual = df_first_vectors[[
    'firebase_id',
    'prolific_id',
    'timestamp',
    'condition',
    'system_prompt',
    'empathy_empathetic',
    'empathy_unempathetic',
    'encouraging_encouraging',
    'encouraging_discouraging',
    'formality_formal',
    'formality_casual',
    'funniness_funny',
    'funniness_serious',
    'hallucination_factual',
    'hallucination_hallucinatory',
    'sociality_social',
    'sociality_antisocial',
    'sycophancy_honest',
    'sycophancy_sycophantic',
    'toxicity_respectful',
    'toxicity_toxic'
]].merge(
    pre_traits,
    on='firebase_id',
    how='left'
)

print("Combined DataFrame: First System Prompt + Predictions + Actual Activations")
print(f"Shape: {df_expected_vs_actual.shape}")
print(f"Total participants: {df_expected_vs_actual['firebase_id'].nunique()}")
print(f"\nColumns:")
print(list(df_expected_vs_actual.columns))
df_expected_vs_actual.head()
df_expected_vs_actual.to_csv('data_clean/persona_prediction.csv',index=False)

Combined DataFrame: First System Prompt + Predictions + Actual Activations
Shape: (80, 30)
Total participants: 80

Columns:
['firebase_id', 'prolific_id', 'timestamp', 'condition', 'system_prompt', 'empathy_empathetic', 'empathy_unempathetic', 'encouraging_encouraging', 'encouraging_discouraging', 'formality_formal', 'formality_casual', 'funniness_funny', 'funniness_serious', 'hallucination_factual', 'hallucination_hallucinatory', 'sociality_social', 'sociality_antisocial', 'sycophancy_honest', 'sycophancy_sycophantic', 'toxicity_respectful', 'toxicity_toxic', 'condition_name', 'pre_empathy', 'pre_encouraging', 'pre_formality', 'pre_funniness', 'pre_hallucination', 'pre_honesty', 'pre_sociality', 'pre_toxicity']


In [95]:
# Display a detailed example comparing predicted vs actual
print("\nDetailed Example - Predicted Ratings vs Actual Activations:")
print("="*80)
sample = df_expected_vs_actual.iloc[0]
print(f"Participant: {sample['prolific_id']}")
print(f"Condition: {sample['condition']} / {sample['condition_name']}")
print(f"Timestamp: {sample['timestamp']}")
print(f"\nSystem Prompt (from first persona vector log):")
print(f"{sample['system_prompt'][:250]}...\n")
print(f"{'Trait':<15} {'Predicted (1-7)':<20} {'Actual Activation':<40}")
print("-"*80)
print(f"{'Empathy':<15} {sample['pre_empathy']:<20} empathetic={sample['empathy_empathetic']:.3f}, unempathetic={sample['empathy_unempathetic']:.3f}")
print(f"{'Encouraging':<15} {sample['pre_encouraging']:<20} encouraging={sample['encouraging_encouraging']:.3f}, discouraging={sample['encouraging_discouraging']:.3f}")
print(f"{'Formality':<15} {sample['pre_formality']:<20} formal={sample['formality_formal']:.3f}, casual={sample['formality_casual']:.3f}")
print(f"{'Funniness':<15} {sample['pre_funniness']:<20} funny={sample['funniness_funny']:.3f}, serious={sample['funniness_serious']:.3f}")
print(f"{'Hallucination':<15} {sample['pre_hallucination']:<20} hallucinatory={sample['hallucination_hallucinatory']:.3f}, factual={sample['hallucination_factual']:.3f}")
print(f"{'Honesty':<15} {sample['pre_honesty']:<20} honest={sample['sycophancy_honest']:.3f}, sycophantic={sample['sycophancy_sycophantic']:.3f}")
print(f"{'Sociality':<15} {sample['pre_sociality']:<20} social={sample['sociality_social']:.3f}, antisocial={sample['sociality_antisocial']:.3f}")
print(f"{'Toxicity':<15} {sample['pre_toxicity']:<20} toxic={sample['toxicity_toxic']:.3f}, respectful={sample['toxicity_respectful']:.3f}")

# Save to CSV
# df_expected_vs_actual.to_csv('data_clean/expected_vs_actual_traits.csv', index=False)
print(f"\n✓ Saved to: data_clean/expected_vs_actual_traits.csv")



Detailed Example - Predicted Ratings vs Actual Activations:
Participant: 6726175b381af01a5aaef18a
Condition: experimental_with_visualization / experimental
Timestamp: 2025-10-09T23:04:12.948Z

System Prompt (from first persona vector log):
You are a kind and empathetic listener who knows how to mirror people's emotions. You are my go to sidekick/companion.You do what I ask. You are friendly and outgoing, never looking to cause any negativity. You love to chat and help out in any way po...

Trait           Predicted (1-7)      Actual Activation                       
--------------------------------------------------------------------------------
Empathy         10.0                 empathetic=0.059, unempathetic=0.000
Encouraging     10.0                 encouraging=0.000, discouraging=0.228
Formality       8.0                  formal=0.000, casual=0.222
Funniness       10.0                 funny=0.000, serious=0.457
Hallucination   10.0                 hallucinatory=0.000, factual=0.

## 9. Normalize Predicted Trait Ratings to Match Activation Space


In [97]:
demographics = pd.read_csv('demographics.csv')

demographics.columns

Index(['Submission id', 'Participant id', 'Status',
       'Custom study tncs accepted at', 'Started at', 'Completed at',
       'Reviewed at', 'Archived at', 'Time taken', 'Completion code',
       'Total approvals', 'Primary language', 'Age', 'Sex',
       'Ethnicity simplified', 'Country of birth', 'Country of residence',
       'Nationality', 'Language', 'Student status', 'Employment status'],
      dtype='object')

In [100]:
demographics['Sex'].value_counts()

Sex
Male               44
Female             36
CONSENT_REVOKED    16
Name: count, dtype: int64

In [109]:
# Filter for participants who didn't revoke consent
consented = demographics[demographics['Sex'] != 'CONSENT_REVOKED']
# Ensure Age is numeric, coerce errors to NaN, then compute mean
consented['Age'] = pd.to_numeric(consented['Age'], errors='coerce')
print(consented['Age'].mean())
print(consented['Age'].std())

42.25
11.385644970418017


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  consented['Age'] = pd.to_numeric(consented['Age'], errors='coerce')
