# Analyze scores for each role

In [1]:
import json
import os
import torch
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.subplots as sp
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from pathlib import Path
from tqdm import tqdm


In [2]:
# roles or roles_240
dir = "qwen-3-32b/roles_240" 

# 30 or 240
type = dir.split('/')[1]

# 30 or 240
if type == "roles":
    n_questions = 30
    n_prompt_types = 2
elif type == "roles_240":
    n_questions = 240
    n_prompt_types = 1


In [3]:
# get responses
responses = {}
for file in os.listdir(f'/workspace/{dir}/responses'):
    if file.endswith('.jsonl'):
        response = []
        with open(f'/workspace/{dir}/responses/{file}', 'r') as f:
            for line in f:
                response.append(json.loads(line))
        if len(response) != n_questions*n_prompt_types*5:
            print(f"Expected {n_questions*n_prompt_types*5} responses, got {len(response)} for {file}")
        responses[file.replace('.jsonl', '')] = response

In [4]:
def response_by_key(response_key: str, responses_list: list) -> int:
    """
    Parse a response key and find the corresponding index in the responses list.
    
    Args:
        response_key: Key in format "{label}_p{prompt_index}_q{question_index}"
                     e.g., "pos_p2_q15", "default_p0_q7"
        responses_list: List of response dictionaries with 'label', 'prompt_index', 'question_index'
    
    Returns:
        Index in responses_list, or -1 if not found
        
    Examples:
        >>> find_response_index("pos_p2_q15", responses)
        42
        >>> find_response_index("default_p0_q7", responses)  
        7
    """
    import re
    
    # Parse the response key using regex
    match = re.match(r'(\w+)_p(\d+)_q(\d+)', response_key)
    if not match:
        print(f"Warning: Could not parse response key: {response_key}")
        return -1
    
    target_label, target_prompt_idx, target_question_idx = match.groups()
    target_prompt_idx = int(target_prompt_idx)
    target_question_idx = int(target_question_idx)
    
    # Handle label normalization (neutral -> default)
    if target_label == 'neutral':
        target_label = 'default'
    
    # Search through responses list
    for response in responses_list:
        response_label = response.get('label')
        response_prompt_idx = response.get('prompt_index', 0)  # Default to 0 for backward compatibility
        response_question_idx = response.get('question_index')
        
        # Handle label normalization for response
        if response_label == 'neutral':
            response_label = 'default'
        
        # Check for match
        if (response_label == target_label and 
            response_prompt_idx == target_prompt_idx and 
            response_question_idx == target_question_idx):
            return response
    
  

## Label statistics

In [8]:
# load data from data/extract_labels
label_dir = f"/workspace/{dir}/extract_scores"

# iterate through each json file in the directory
labels = {}
for file in os.listdir(label_dir):
    if file.endswith(".json"):
        with open(os.path.join(label_dir, file), "r") as f:
            labels[file.replace(".json", "")] = json.load(f)
            if len(labels[file.replace(".json", "")]) != n_questions*n_prompt_types*5:
                print(f"Expected {n_questions*n_prompt_types*5} labels, got {len(labels[file.replace('.json', '')])} for {file}")

print(f"Found {len(labels.keys())} roles with labels")


Found 275 roles with labels


In [9]:
labels_np = {}

for role, labels in labels.items():
    # Create 3D array: [type, prompt, question]
    labels_3d = np.full((n_prompt_types, 5, n_questions), np.nan)
    
    # Extract scores for each type, prompt, and question
    for prompt_idx in range(5):
        for question_idx in range(n_questions):
            # pos scores
            pos_key = f"pos_p{prompt_idx}_q{question_idx}"
            if pos_key in labels:
                labels_3d[0, prompt_idx, question_idx] = labels[pos_key]
            
            # default scores
            default_key = f"default_p{prompt_idx}_q{question_idx}"
            if default_key in labels:
                labels_3d[1, prompt_idx, question_idx] = labels[default_key]
    
    labels_np[role] = labels_3d

print(f"Created numpy arrays for {len(labels_np)} roles")
example_role = list(labels_np.keys())[1]
print(f"Shape of each array: {labels_np[example_role].shape}")
print(f"Example (first role): {list(labels_np.keys())[1]}")
print(f"Pos scores for first 2 prompts, 5 questions:\n{labels_np[example_role][0, :2, :5]}")

Created numpy arrays for 275 roles
Shape of each array: (1, 5, 240)
Example (first role): journalist
Pos scores for first 2 prompts, 5 questions:
[[1. 1. 1. 1. 3.]
 [3. 3. 1. 2. 3.]]


In [12]:
# Calculate label statistics for each trait
label_stats = {}

# First, find all unique label values across all traits
all_labels = set()
for role, labels_3d in labels_np.items():
    flat_labels = labels_3d.flatten()
    unique_labels = flat_labels[~np.isnan(flat_labels)]
    all_labels.update(unique_labels)

all_labels = sorted(list(all_labels))
print(f"Found {len(all_labels)} unique labels: {all_labels}")

# Count label frequencies for each trait
for role, labels_3d in labels_np.items():
    flat_labels = labels_3d.flatten()
    clean_labels = flat_labels[~np.isnan(flat_labels)]
    
    # Count occurrences of each label
    label_counts = {}
    for label_val in all_labels:
        count = np.sum(clean_labels == label_val)
        label_counts[f"label_{int(label_val)}"] = count
    
    label_stats[role] = label_counts

# Convert to DataFrame
label_stats_df = pd.DataFrame.from_dict(label_stats, orient='index')
label_stats_df.index.name = 'role'

# Fill NaN values with 0 (for labels that don't appear for some traits)
label_stats_df = label_stats_df.fillna(0).astype(int)

print(f"\nCalculated label statistics for {len(label_stats_df)} roles")
print(f"Shape: {label_stats_df.shape}")
print("\nSample label statistics:")
print(label_stats_df.head())

print(f"\nLabel distribution summary:")
for col in label_stats_df.columns:
    total_count = label_stats_df[col].sum()
    traits_with_label = (label_stats_df[col] > 0).sum()
    print(f"  {col}: {total_count} total occurrences across {traits_with_label} roles")

# Export to CSV
output_dir = f"./results/{dir}"
Path(output_dir).mkdir(parents=True, exist_ok=True)
label_stats_df.to_csv(f'{output_dir}/label_stats.csv')
print(f"\nExported label statistics to {output_dir}/label_stats.csv")

Found 4 unique labels: [0.0, 1.0, 2.0, 3.0]

Calculated label statistics for 275 roles
Shape: (275, 4)

Sample label statistics:
               label_0  label_1  label_2  label_3
role                                             
collector            1      193      180      826
journalist           2      482      130      586
tutor                0        1       48     1151
perfectionist        1       99      619      481
saboteur            47      438       72      643

Label distribution summary:
  label_0: 3354 total occurrences across 219 roles
  label_1: 30556 total occurrences across 264 roles
  label_2: 34070 total occurrences across 273 roles
  label_3: 262020 total occurrences across 275 roles

Exported label statistics to ./results/qwen-3-32b/roles_240/label_stats.csv
