# Analysis of Efficient Combined 1_1b Results

This notebook analyzes the results from the `efficient_combined_1_1b` run, including:
- SSS (Sycophancy Similarity Scores) analysis
- Network visualization with adjustable parameters
- Model comparison and sycophancy patterns
- Response-level analysis

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add project root to path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

from sycophancy_analysis.visualization import (
    plot_network, create_heatmap, compute_sycophancy_index,
    create_network_sidecar
)
from sycophancy_analysis.scoring import build_sss

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

ImportError: cannot import name 'create_heatmap' from 'sycophancy_analysis.visualization' (e:\Working\Posts\LLM Sychopancy Analysis\sycophancy_analysis\visualization\__init__.py)

## 1. Load Data

In [None]:
# Define data paths
results_dir = project_root / "results" / "efficient_combined_1_1b"
sss_scores_path = results_dir / "sss_scores.csv"
scored_rows_path = results_dir / "scored_rows.csv"

# Load data
sss_scores = pd.read_csv(sss_scores_path)
scored_rows = pd.read_csv(scored_rows_path)

print(f"SSS Scores shape: {sss_scores.shape}")
print(f"Scored Rows shape: {scored_rows.shape}")
print(f"\nModels analyzed: {len(sss_scores)}")
print(f"Total responses: {len(scored_rows)}")

## 2. SSS Scores Overview

In [None]:
# Display SSS scores
print("SSS Scores Summary:")
display(sss_scores)

# Basic statistics
print("\nSSS Score Statistics:")
sss_numeric = sss_scores.select_dtypes(include=[np.number])
display(sss_numeric.describe())

## 3. Network Visualization with Adjustable Parameters

### 3.1 Network Plot Configuration

In [None]:
# Network plot parameters - ADJUST THESE AS NEEDED
network_params = {
    'figsize': (12, 10),           # Figure size
    'node_size_base': 300,         # Base node size
    'node_size_scale': 1000,       # Node size scaling factor
    'edge_width_base': 0.5,        # Base edge width
    'edge_width_scale': 3.0,       # Edge width scaling factor
    'edge_alpha': 0.6,             # Edge transparency
    'similarity_threshold': 0.1,   # Minimum similarity to show edge
    'layout_k': 1.0,               # Spring layout spacing parameter
    'layout_iterations': 50,       # Spring layout iterations
    'font_size': 10,               # Node label font size
    'show_edge_labels': False,     # Whether to show edge weight labels
    'community_colors': True,      # Use community-based coloring
    'highlight_bridges': True,     # Highlight bridge edges
    'bridge_color': 'red',         # Color for bridge edges
    'bridge_width': 2.0,           # Width for bridge edges
}

print("Network visualization parameters:")
for key, value in network_params.items():
    print(f"  {key}: {value}")

### 3.2 Build Similarity Matrix and Network

In [None]:
# Build SSS similarity matrix from scored rows
print("Building similarity matrix...")
sss_result = build_sss(scored_rows)

# Extract components
similarity_matrix = sss_result['similarity_matrix']
model_names = sss_result['model_names']
distance_matrix = sss_result['distance_matrix']

print(f"Similarity matrix shape: {similarity_matrix.shape}")
print(f"Model names: {model_names}")
print(f"\nSimilarity matrix statistics:")
print(f"  Min: {similarity_matrix.min():.3f}")
print(f"  Max: {similarity_matrix.max():.3f}")
print(f"  Mean: {similarity_matrix.mean():.3f}")
print(f"  Std: {similarity_matrix.std():.3f}")

### 3.3 Generate Network Plot

In [None]:
# Create network plot with custom parameters
fig, ax = plt.subplots(figsize=network_params['figsize'])

# Generate network plot
network_data = plot_network(
    similarity_matrix=similarity_matrix,
    model_names=model_names,
    save_path=None,  # Don't save, just display
    figsize=network_params['figsize'],
    node_size_base=network_params['node_size_base'],
    node_size_scale=network_params['node_size_scale'],
    edge_width_base=network_params['edge_width_base'],
    edge_width_scale=network_params['edge_width_scale'],
    edge_alpha=network_params['edge_alpha'],
    similarity_threshold=network_params['similarity_threshold'],
    layout_k=network_params['layout_k'],
    layout_iterations=network_params['layout_iterations'],
    font_size=network_params['font_size'],
    show_edge_labels=network_params['show_edge_labels'],
    community_colors=network_params['community_colors'],
    highlight_bridges=network_params['highlight_bridges'],
    bridge_color=network_params['bridge_color'],
    bridge_width=network_params['bridge_width']
)

plt.title("LLM Sycophancy Network - Efficient Combined 1_1b", fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

# Print network statistics
if network_data:
    print("\nNetwork Statistics:")
    print(f"  Nodes: {network_data.get('num_nodes', 'N/A')}")
    print(f"  Edges: {network_data.get('num_edges', 'N/A')}")
    print(f"  Communities: {network_data.get('num_communities', 'N/A')}")
    print(f"  Modularity: {network_data.get('modularity', 'N/A'):.3f}")
    print(f"  Bridge edges: {network_data.get('num_bridges', 'N/A')}")

### 3.4 Alternative Network Layouts

Try different layout parameters for comparison:

In [None]:
# Create multiple network plots with different parameters
layout_variants = [
    {'k': 0.5, 'iterations': 30, 'title': 'Tight Layout (k=0.5)'},
    {'k': 1.5, 'iterations': 50, 'title': 'Loose Layout (k=1.5)'},
    {'k': 2.0, 'iterations': 100, 'title': 'Very Loose Layout (k=2.0)'}
]

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

for i, variant in enumerate(layout_variants):
    ax = axes[i]
    
    # Create network plot with variant parameters
    plot_network(
        similarity_matrix=similarity_matrix,
        model_names=model_names,
        save_path=None,
        figsize=(6, 6),
        layout_k=variant['k'],
        layout_iterations=variant['iterations'],
        similarity_threshold=network_params['similarity_threshold'],
        ax=ax
    )
    
    ax.set_title(variant['title'], fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

## 4. Similarity Heatmap

In [None]:
# Create similarity heatmap
heatmap_chart = create_heatmap(
    similarity_matrix=similarity_matrix,
    model_names=model_names,
    title="LLM Sycophancy Similarity Heatmap - Efficient Combined 1_1b"
)

# Display the heatmap
heatmap_chart.show()

# Also create a matplotlib version for more control
plt.figure(figsize=(10, 8))
mask = np.triu(np.ones_like(similarity_matrix, dtype=bool))
sns.heatmap(
    similarity_matrix,
    mask=mask,
    annot=True,
    fmt='.3f',
    cmap='RdYlBu_r',
    center=0.5,
    xticklabels=model_names,
    yticklabels=model_names,
    cbar_kws={'label': 'Similarity Score'}
)
plt.title('LLM Sycophancy Similarity Matrix', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

## 5. Sycophancy Index Analysis

In [None]:
# Compute Sycophancy Index for each model
sycophancy_indices = compute_sycophancy_index(
    similarity_matrix=similarity_matrix,
    model_names=model_names
)

# Create DataFrame for easier analysis
si_df = pd.DataFrame({
    'Model': model_names,
    'Sycophancy_Index': sycophancy_indices
}).sort_values('Sycophancy_Index', ascending=False)

print("Sycophancy Index Rankings:")
display(si_df)

# Visualize Sycophancy Index
plt.figure(figsize=(12, 6))
bars = plt.bar(range(len(si_df)), si_df['Sycophancy_Index'], 
               color=plt.cm.RdYlBu_r(si_df['Sycophancy_Index']))
plt.xlabel('Models')
plt.ylabel('Sycophancy Index')
plt.title('Sycophancy Index by Model', fontsize=14, fontweight='bold')
plt.xticks(range(len(si_df)), si_df['Model'], rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
             f'{height:.3f}', ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

## 6. Response-Level Analysis

In [None]:
# Analyze response patterns
print("Response-Level Analysis:")
print(f"Total responses: {len(scored_rows)}")
print(f"Unique models: {scored_rows['model'].nunique()}")
print(f"Unique prompts: {scored_rows['prompt_id'].nunique()}")

# Prediction label distribution
print("\nPrediction Label Distribution:")
pred_counts = scored_rows['pred_label'].value_counts()
display(pred_counts)

# Visualize prediction distribution by model
plt.figure(figsize=(12, 6))
pred_by_model = scored_rows.groupby(['model', 'pred_label']).size().unstack(fill_value=0)
pred_by_model.plot(kind='bar', stacked=True, ax=plt.gca())
plt.title('Prediction Label Distribution by Model', fontsize=14, fontweight='bold')
plt.xlabel('Model')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.legend(title='Prediction Label', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## 7. Sycophancy Behavior Patterns

In [None]:
# Analyze sycophancy behavior patterns
behavior_cols = ['endorse_stance', 'challenge', 'harm_validation', 'devil_advocate',
                'flattery_density', 'intens_density', 'hedge_density', 
                'refusal_markers', 'safe_alt_markers', 'evasion_markers']

# Calculate mean behavior scores by model
behavior_by_model = scored_rows.groupby('model')[behavior_cols].mean()

print("Mean Behavior Scores by Model:")
display(behavior_by_model.round(3))

# Create heatmap of behavior patterns
plt.figure(figsize=(12, 8))
sns.heatmap(
    behavior_by_model.T,
    annot=True,
    fmt='.3f',
    cmap='RdYlBu_r',
    center=0.25,
    cbar_kws={'label': 'Mean Score'}
)
plt.title('Sycophancy Behavior Patterns by Model', fontsize=14, fontweight='bold')
plt.xlabel('Model')
plt.ylabel('Behavior Dimension')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## 8. Harmful vs Non-Harmful Response Analysis

In [None]:
# Analyze differences between harmful and non-harmful prompts
harm_analysis = scored_rows.groupby(['model', 'is_harmful'])[behavior_cols].mean().round(3)

print("Behavior Differences: Harmful vs Non-Harmful Prompts")
display(harm_analysis)

# Visualize the difference
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Non-harmful prompts
non_harmful = harm_analysis.xs(False, level='is_harmful')
sns.heatmap(non_harmful.T, annot=True, fmt='.3f', cmap='RdYlBu_r', 
           center=0.25, ax=axes[0], cbar_kws={'label': 'Mean Score'})
axes[0].set_title('Non-Harmful Prompts - Behavior Patterns', fontweight='bold')
axes[0].set_xlabel('')

# Harmful prompts
harmful = harm_analysis.xs(True, level='is_harmful')
sns.heatmap(harmful.T, annot=True, fmt='.3f', cmap='RdYlBu_r', 
           center=0.25, ax=axes[1], cbar_kws={'label': 'Mean Score'})
axes[1].set_title('Harmful Prompts - Behavior Patterns', fontweight='bold')
axes[1].set_xlabel('Model')

for ax in axes:
    ax.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 9. Export Network Metadata

In [None]:
# Create network metadata sidecar for reproducibility
metadata_path = results_dir / "network_analysis.meta.json"

network_metadata = create_network_sidecar(
    similarity_matrix=similarity_matrix,
    model_names=model_names,
    save_path=str(metadata_path),
    **network_params
)

print(f"Network metadata saved to: {metadata_path}")
print("\nMetadata includes:")
if network_metadata:
    for key in network_metadata.keys():
        print(f"  - {key}")

## 10. Summary and Insights

In [None]:
print("=" * 60)
print("ANALYSIS SUMMARY - Efficient Combined 1_1b Results")
print("=" * 60)

print(f"\n📊 Dataset Overview:")
print(f"   • Models analyzed: {len(sss_scores)}")
print(f"   • Total responses: {len(scored_rows)}")
print(f"   • Unique prompts: {scored_rows['prompt_id'].nunique()}")

print(f"\n🎯 Top Sycophantic Models:")
top_3 = si_df.head(3)
for i, (_, row) in enumerate(top_3.iterrows(), 1):
    print(f"   {i}. {row['Model']}: {row['Sycophancy_Index']:.3f}")

print(f"\n🔗 Network Characteristics:")
if network_data:
    print(f"   • Communities detected: {network_data.get('num_communities', 'N/A')}")
    print(f"   • Network modularity: {network_data.get('modularity', 'N/A'):.3f}")
    print(f"   • Bridge connections: {network_data.get('num_bridges', 'N/A')}")

print(f"\n📈 Response Patterns:")
agreement_pct = (scored_rows['pred_label'] == 'AGREEMENT').mean() * 100
print(f"   • Agreement responses: {agreement_pct:.1f}%")
harmful_pct = scored_rows['is_harmful'].mean() * 100
print(f"   • Harmful prompts: {harmful_pct:.1f}%")

print(f"\n🔧 Network Parameters Used:")
print(f"   • Similarity threshold: {network_params['similarity_threshold']}")
print(f"   • Layout spacing (k): {network_params['layout_k']}")
print(f"   • Layout iterations: {network_params['layout_iterations']}")

print("\n" + "=" * 60)