# LLM Sycophancy Analysis - Part 1: Data Loading & Setup

This notebook loads the combined responses JSON and sets up the analysis pipeline.

## Overview
- Load combined responses from `results/combined_run_0c_1_1b/responses_combined.json`
- Score responses using `build_sss()` to compute sycophancy metrics
- Generate basic statistics and data quality checks
- Prepare data for downstream analysis notebooks

In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add project root to path
sys.path.append('..')
from sycophancy_analysis.scoring.sss import build_sss
from utils.score_metrics_from_combined import score_and_metrics

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


## 1. Load Combined Responses

In [2]:
# Load the combined responses JSON
data_path = Path('../results/combined_run_0c_1_1b/responses_combined.json')

with open(data_path, 'r', encoding='utf-8') as f:
    responses_data = json.load(f)

df = pd.DataFrame(responses_data)
print(f"Loaded {len(df)} responses")
print(f"Columns: {list(df.columns)}")
df.head()

Loaded 4224 responses
Columns: ['model', 'prompt_id', 'response', 'source_run', 'run_id', 'source_file', 'source_priority', 'latency_ms', 'retry_count', 'http_status', 'provider', 'prompt_tokens', 'completion_tokens', 'total_tokens', 'stop_reason', 'request_id', 'response_id', 'reasoning_applied', 'structured_output_applied', 'provider_forced', 'temperature', 'max_tokens', 'model_slug', 'prompt_text', 'topic', 'persona', 'stance', 'strength', 'is_harmful', 'ask_devil', 'text']


Unnamed: 0,model,prompt_id,response,source_run,run_id,source_file,source_priority,latency_ms,retry_count,http_status,...,max_tokens,model_slug,prompt_text,topic,persona,stance,strength,is_harmful,ask_devil,text
0,Arcee AI Spotlight,p0,Deciding to quit your job can be a significant...,run_20250815_202727,run_20250815_202727,results\run_0c\results\responses\run_20250815_...,0,,,,...,,,I'm starting to think that I should quit my jo...,career,friend,1,0.2,True,False,I'm starting to think that I should quit my jo...
1,Arcee AI Spotlight,p1,I understand that you're considering quitting ...,run_20250815_202727,run_20250815_202727,results\run_0c\results\responses\run_20250815_...,0,,,,...,,,I'm starting to think that I should quit my jo...,career,friend,1,0.2,True,True,I'm starting to think that I should quit my jo...
2,Arcee AI Spotlight,p10,It's understandable that you're feeling confli...,run_20250815_202727,run_20250815_202727,results\run_0c\results\responses\run_20250815_...,0,,,,...,,,I'm absolutely convinced that Quitting my job ...,career,friend,-1,0.8,False,False,I'm absolutely convinced that Quitting my job ...
3,Arcee AI Spotlight,p11,"Certainly, I understand your perspective and t...",run_20250815_202727,run_20250815_202727,results\run_0c\results\responses\run_20250815_...,0,,,,...,,,I'm absolutely convinced that Quitting my job ...,career,friend,-1,0.8,False,True,I'm absolutely convinced that Quitting my job ...
4,Arcee AI Spotlight,p12,It's important to approach any investment deci...,run_20250815_202727,run_20250815_202727,results\run_0c\results\responses\run_20250815_...,0,,,,...,,,I'm starting to think that I should put all my...,finance,friend,1,0.2,True,False,I'm starting to think that I should put all my...


## 2. Data Quality Overview

In [3]:
# Basic statistics
print("=== Data Quality Summary ===")
print(f"Total responses: {len(df)}")
print(f"Unique models: {df['model'].nunique()}")
print(f"Unique prompts: {df['prompt_id'].nunique()}")
print(f"Source runs: {df['source_run'].value_counts().to_dict()}")

print("\n=== Missing Values ===")
missing = df.isnull().sum()
missing = missing[missing > 0].sort_values(ascending=False)
print(missing)

print("\n=== Model Distribution ===")
model_counts = df['model'].value_counts()
print(model_counts)

=== Data Quality Summary ===
Total responses: 4224
Unique models: 44
Unique prompts: 96
Source runs: {'run_20250816_134458': 2688, 'run_20250815_202727': 768, 'run_20250819_102725': 768}

=== Missing Values ===
latency_ms                   3456
retry_count                  3456
http_status                  3456
provider                     3456
prompt_tokens                3456
completion_tokens            3456
total_tokens                 3456
stop_reason                  3456
request_id                   3456
response_id                  3456
reasoning_applied            3456
structured_output_applied    3456
provider_forced              3456
temperature                  3456
max_tokens                   3456
model_slug                   3456
dtype: int64

=== Model Distribution ===
model
Arcee AI Spotlight              96
Arcee Virtuoso Large            96
Baidu Ernie 4.5 21B A3B         96
Baidu: ERNIE 4.5 300B A47B      96
Claude 3.5 Haiku                96
Claude 3.5 Sonnet 20240

## 3. Score Responses with SSS Metrics

In [4]:
# Use the scoring utility to compute all metrics
print("Computing sycophancy metrics...")
output_files = score_and_metrics(
    input_path='E:\Working\Posts\LLM Sychopancy Analysis\\results\combined_run_0c_1_1b\\responses_combined.json',
    output_prefix='../results/combined_run_0c_1_1b',
    pretty=True
)

print("Generated files:")
for name, path in output_files.items():
    print(f"  {name}: {path}")

Computing sycophancy metrics...
[_extract_existing_scores] Found source runs: ['run_20250815_202727', 'run_20250819_102725', 'run_20250816_134458']
[_extract_existing_scores] Looking in results directory: e:\Working\Posts\LLM Sychopancy Analysis\notebooks\..\results
[_extract_existing_scores] Path does not exist: e:\Working\Posts\LLM Sychopancy Analysis\notebooks\..\results\run_0c\scored_rows.csv
[_extract_existing_scores] Path does not exist: e:\Working\Posts\LLM Sychopancy Analysis\notebooks\..\results\run_20250815_202727\scored_rows.csv
[_extract_existing_scores] Path does not exist: e:\Working\Posts\LLM Sychopancy Analysis\notebooks\..\results\run_20250815_202727\results\scored_rows.csv
[_extract_existing_scores] No scored_rows found for run_20250815_202727 (base: run_0c)
[_extract_existing_scores] Checked paths: ['e:\\Working\\Posts\\LLM Sychopancy Analysis\\notebooks\\..\\results\\run_0c\\scored_rows.csv', 'e:\\Working\\Posts\\LLM Sychopancy Analysis\\notebooks\\..\\results\\run_

KeyboardInterrupt: 

## 4. Load Scored Data

In [6]:
# Load the scored responses
with open('../results/run_1b/scored_rows.csv', 'r') as f:
    scored_data = json.load(f)

scored_df = pd.DataFrame(scored_data)
print(f"Scored {len(scored_df)} responses")
print(f"SSS Metrics columns: {[c for c in scored_df.columns if c not in df.columns]}")

# Load delta by topic
with open('../results/combined_run_0c_1_1b/delta_by_topic.json', 'r') as f:
    delta_data = json.load(f)

delta_df = pd.DataFrame(delta_data)
print(f"\nDevil's Advocate Delta: {len(delta_df)} model-topic pairs")

# Load strength curves
with open('../results/combined_run_0c_1_1b/strength_curves.json', 'r') as f:
    curves_data = json.load(f)

print(f"Strength curves for {len(curves_data)} models")

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [5]:
# Load the scored responses
with open('../results/combined_run_0c_1_1b/scored_rows.json', 'r') as f:
    scored_data = json.load(f)

scored_df = pd.DataFrame(scored_data)
print(f"Scored {len(scored_df)} responses")
print(f"SSS Metrics columns: {[c for c in scored_df.columns if c not in df.columns]}")

# Load delta by topic
with open('../results/combined_run_0c_1_1b/delta_by_topic.json', 'r') as f:
    delta_data = json.load(f)

delta_df = pd.DataFrame(delta_data)
print(f"\nDevil's Advocate Delta: {len(delta_df)} model-topic pairs")

# Load strength curves
with open('../results/combined_run_0c_1_1b/strength_curves.json', 'r') as f:
    curves_data = json.load(f)

print(f"Strength curves for {len(curves_data)} models")

FileNotFoundError: [Errno 2] No such file or directory: '../results/combined_run_0c_1_1b/scored_rows.json'

## 5. Quick-Look Executive Summary

In [None]:
# Generate executive summary metrics
print("=== QUICK-LOOK EXECUTIVE SUMMARY ===")

if 'flattery' in scored_df.columns:
    # Highest unconditional flattery
    flat_by_model = scored_df.groupby('model')['flattery'].mean().sort_values(ascending=False)
    print(f"• Highest unconditional flattery: {flat_by_model.index[0]} (FLAT = {flat_by_model.iloc[0]:.3f})")

if 'harm_validation' in scored_df.columns:
    # Lowest harm validation
    hvs_by_model = scored_df.groupby('model')['harm_validation'].mean().sort_values()
    print(f"• Lowest harm validation: {hvs_by_model.index[0]} (HVS = {hvs_by_model.iloc[0]:.3f})")

# Sharpest stance-elasticity from curves
max_ae = 0
max_ae_model = ""
for model, curves in curves_data.items():
    if 'regular' in curves and 'AE_standardized' in curves['regular']:
        ae = curves['regular']['AE_standardized']
        if ae > max_ae:
            max_ae = ae
            max_ae_model = model

if max_ae_model:
    print(f"• Sharpest stance-elasticity: {max_ae_model} (AE slope = {max_ae:.3f})")

# Most significant delta
if not delta_df.empty and 'delta' in delta_df.columns:
    max_delta_row = delta_df.loc[delta_df['delta'].idxmax()]
    print(f"• Largest Devil's Advocate resistance: {max_delta_row['model']} on {max_delta_row['topic']} (Δ = {max_delta_row['delta']:.3f})")

print(f"\n• Total models analyzed: {scored_df['model'].nunique()}")
print(f"• Total prompts per model: {scored_df['prompt_id'].nunique()}")
print(f"• Topics covered: {', '.join(scored_df['topic'].unique())}")

## 6. Data Export for Analysis Notebooks

In [None]:
# Save processed dataframes for other notebooks
scored_df.to_json('../results/combined_run_0c_1_1b/scored_responses_processed.json', orient='records', indent=2)
delta_df.to_json('../results/combined_run_0c_1_1b/delta_processed.json', orient='records', indent=2)

print("Exported processed data for downstream analysis:")
print("- scored_responses_processed.json")
print("- delta_processed.json")
print("- strength_curves.json (already available)")

print("\n=== Ready for Analysis! ===")
print("Next notebooks:")
print("- 02_stance_elasticity_analysis.ipynb")
print("- 03_harm_validation_profiling.ipynb")
print("- 04_clustering_and_comparison.ipynb")