<a href="https://colab.research.google.com/github/leosammallahti/AnalysisCoLab/blob/main/Another_copy_of_Chinese_Philosophy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Looking for CSV at: /content/drive/MyDrive/Chinese Philosophers/chinese_philosophers_quotes_corrected.csv
File exists: True
Loaded 1162 quotes from CSV
Cache hits: 1162; to process: 0
All items are cached. To force re-run, set LLM_RESUME=0 or clear cache at:
  /content/drive/MyDrive/Chinese Philosophers/cache/qwen_multidim
Saved:
  /content/drive/MyDrive/Chinese Philosophers/per_quote_qwen_multidim.jsonl
  /content/drive/MyDrive/Chinese Philosophers/per_quote_qwen_multidim.csv
Estimated cost for 1162 quotes (avg in 300, out 220): $0.2231 (in $0.0697 + out $0.1534)


In [None]:
# ============================================
# SECTION 1: INITIAL SETUP AND IMPORTS
# ============================================

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Core imports
import pandas as pd
import numpy as np
import json
import warnings
warnings.filterwarnings('ignore')

# Visualization imports
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning imports
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Set visualization defaults
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11

# Define paths
BASE_PATH = '/content/drive/MyDrive/Chinese Philosophers/'
print(f"✓ Setup complete. Base path: {BASE_PATH}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✓ Setup complete. Base path: /content/drive/MyDrive/Chinese Philosophers/


In [None]:
# ============================================
# SECTION 2: HELPER FUNCTIONS FOR DATA LOADING & PROCESSING
# ============================================

def load_embeddings_and_metadata(base_path=BASE_PATH):
    """Load the raw embeddings and corresponding metadata."""
    try:
        # Load the numpy embeddings
        embeddings = np.load(f'{base_path}quote_embeddings.npy')

        # Load the metadata
        metadata = pd.read_csv(f'{base_path}quote_metadata.csv')

        print(f"✓ Loaded embeddings: shape {embeddings.shape}")
        print(f"✓ Loaded metadata: {len(metadata)} rows")
        print(f"  Philosophers: {metadata['philosopher'].value_counts().to_dict()}")

        return embeddings, metadata
    except Exception as e:
        print(f"❌ Error loading embeddings/metadata: {e}")
        return None, None

def load_csv_safe(filename, base_path=BASE_PATH):
    """Safely load CSV with error handling."""
    try:
        df = pd.read_csv(f'{base_path}{filename}')
        print(f"✓ Loaded {filename}: {len(df)} rows, {len(df.columns)} columns")
        return df
    except Exception as e:
        print(f"❌ Error loading {filename}: {e}")
        return None

def parse_semicolon_field(field_value):
    """Parse semicolon-separated string into list, handling NaN."""
    if pd.isna(field_value):
        return []
    return [item.strip() for item in str(field_value).split(';') if item.strip()]

def parse_json_field(field_value):
    """Parse JSON string field, returning empty list if invalid."""
    if pd.isna(field_value):
        return []
    try:
        return json.loads(field_value)
    except:
        return []

def compute_philosopher_centroids(embeddings, metadata):
    """Compute the centroid (mean vector) for each philosopher."""
    centroids = {}

    for philosopher in metadata['philosopher'].unique():
        # Get indices for this philosopher
        mask = metadata['philosopher'] == philosopher
        philosopher_embeddings = embeddings[mask]

        # Compute centroid (mean across all quotes)
        centroid = philosopher_embeddings.mean(axis=0)
        centroids[philosopher] = centroid

        print(f"✓ Computed centroid for {philosopher}: shape {centroid.shape}")

    return centroids

def find_bridge_quotes(embeddings, metadata, n_bridges=10):
    """Find quotes from each philosopher closest to the other's centroid."""
    centroids = compute_philosopher_centroids(embeddings, metadata)
    bridges = {}

    for source_phil in ['Confucius', 'Mozi']:
        target_phil = 'Mozi' if source_phil == 'Confucius' else 'Confucius'

        # Get source philosopher's quotes
        source_mask = metadata['philosopher'] == source_phil
        source_embeddings = embeddings[source_mask]
        source_metadata = metadata[source_mask].copy()

        # Compute similarities to target centroid
        target_centroid = centroids[target_phil].reshape(1, -1)
        similarities = cosine_similarity(source_embeddings, target_centroid).flatten()

        # Get top N most similar
        top_indices = similarities.argsort()[-n_bridges:][::-1]

        bridge_df = source_metadata.iloc[top_indices].copy()
        bridge_df['similarity_to_other'] = similarities[top_indices]

        bridges[f"{source_phil}_to_{target_phil}"] = bridge_df

    return bridges

# Test the loading functions
print("=" * 50)
print("TESTING HELPER FUNCTIONS")
print("=" * 50)

# Test embedding loading
embeddings, metadata = load_embeddings_and_metadata()

# Test CSV loading
quotes_df = load_csv_safe('chinese_philosophers_quotes_corrected.csv')
analysis_df = load_csv_safe('per_quote_qwen_multidim.csv')

print("\n✓ All helper functions defined and tested!")

TESTING HELPER FUNCTIONS
✓ Loaded embeddings: shape (1162, 384)
✓ Loaded metadata: 1162 rows
  Philosophers: {'Mozi': 634, 'Confucius': 528}
✓ Loaded chinese_philosophers_quotes_corrected.csv: 1162 rows, 5 columns
✓ Loaded per_quote_qwen_multidim.csv: 1090 rows, 19 columns

✓ All helper functions defined and tested!


In [None]:
# ============================================
# ANALYSIS 1: EMBEDDING SPACE PHILOSOPHICAL TERRITORIES
# ============================================

print("ANALYSIS 1: Philosophical Territories in Embedding Space")
print("=" * 60)

# Reduce embeddings to 2D using UMAP and PCA
print("Computing dimensionality reductions...")

# UMAP reduction (better for preserving local structure)
umap_reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=30, min_dist=0.1)
embeddings_umap = umap_reducer.fit_transform(embeddings)

# PCA reduction (better for preserving global structure)
pca_reducer = PCA(n_components=2, random_state=42)
embeddings_pca = pca_reducer.fit_transform(embeddings)

print(f"✓ UMAP variance preserved: Not applicable (non-linear)")
print(f"✓ PCA variance explained: {pca_reducer.explained_variance_ratio_.sum():.2%}")

# Create visualization dataframe
viz_df = metadata.copy()
viz_df['UMAP_1'] = embeddings_umap[:, 0]
viz_df['UMAP_2'] = embeddings_umap[:, 1]
viz_df['PCA_1'] = embeddings_pca[:, 0]
viz_df['PCA_2'] = embeddings_pca[:, 1]

# Create side-by-side interactive plots
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('UMAP Projection', 'PCA Projection'),
    horizontal_spacing=0.15
)

# Color mapping
colors = {'Confucius': '#1f77b4', 'Mozi': '#ff7f0e'}

# UMAP plot
for philosopher in ['Confucius', 'Mozi']:
    phil_data = viz_df[viz_df['philosopher'] == philosopher]
    fig.add_trace(
        go.Scatter(
            x=phil_data['UMAP_1'],
            y=phil_data['UMAP_2'],
            mode='markers',
            name=philosopher,
            marker=dict(
                color=colors[philosopher],
                size=6,
                opacity=0.7,
                line=dict(width=0.5, color='white')
            ),
            text=[f"{row['philosopher']}<br>Quote: {row['text'][:100]}..."
                  for _, row in phil_data.iterrows()],
            hovertemplate='%{text}<extra></extra>',
            legendgroup=philosopher,
            showlegend=True
        ),
        row=1, col=1
    )

# PCA plot
for philosopher in ['Confucius', 'Mozi']:
    phil_data = viz_df[viz_df['philosopher'] == philosopher]
    fig.add_trace(
        go.Scatter(
            x=phil_data['PCA_1'],
            y=phil_data['PCA_2'],
            mode='markers',
            name=philosopher,
            marker=dict(
                color=colors[philosopher],
                size=6,
                opacity=0.7,
                line=dict(width=0.5, color='white')
            ),
            text=[f"{row['philosopher']}<br>Quote: {row['text'][:100]}..."
                  for _, row in phil_data.iterrows()],
            hovertemplate='%{text}<extra></extra>',
            legendgroup=philosopher,
            showlegend=False
        ),
        row=1, col=2
    )

fig.update_layout(
    title_text="<b>Philosophical Territories in Semantic Space</b><br><sup>Do Mozi and Confucius occupy distinct regions of thought?</sup>",
    title_x=0.5,
    height=600,
    width=1200,
    template='plotly_white',
    hovermode='closest',
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)

fig.update_xaxes(title_text="UMAP Dimension 1", row=1, col=1)
fig.update_yaxes(title_text="UMAP Dimension 2", row=1, col=1)
fig.update_xaxes(title_text=f"PCA Dimension 1 ({pca_reducer.explained_variance_ratio_[0]:.1%} var)", row=1, col=2)
fig.update_yaxes(title_text=f"PCA Dimension 2 ({pca_reducer.explained_variance_ratio_[1]:.1%} var)", row=1, col=2)

fig.show()

# Compute overlap statistics
print("\n📊 TERRITORIAL ANALYSIS:")
print("-" * 40)

# Find border regions (quotes in mixed neighborhoods)
from sklearn.neighbors import NearestNeighbors

for projection_name, projection_data in [('UMAP', embeddings_umap), ('PCA', embeddings_pca)]:
    # Find 10 nearest neighbors for each quote
    nbrs = NearestNeighbors(n_neighbors=11).fit(projection_data)  # 11 because it includes self
    distances, indices = nbrs.kneighbors(projection_data)

    # Check philosophical diversity in neighborhood
    border_quotes = []
    for i, neighbors in enumerate(indices):
        neighbor_philosophers = metadata.iloc[neighbors[1:]]['philosopher'].values  # Exclude self
        confucius_count = (neighbor_philosophers == 'Confucius').sum()
        mozi_count = (neighbor_philosophers == 'Mozi').sum()

        # Consider it a border quote if neighborhood is mixed (at least 30% from other philosopher)
        if min(confucius_count, mozi_count) >= 3:
            border_quotes.append(i)

    border_percentage = len(border_quotes) / len(metadata) * 100
    print(f"\n{projection_name} Results:")
    print(f"  • Border quotes (mixed neighborhoods): {len(border_quotes)} ({border_percentage:.1f}%)")
    print(f"  • Suggests {'significant overlap' if border_percentage > 30 else 'moderate overlap' if border_percentage > 15 else 'distinct territories'}")

print("\n💡 PHILOSOPHICAL INSIGHT:")
print("If the philosophers occupy distinct regions with minimal overlap,")
print("it suggests fundamentally different conceptual frameworks.")
print("Border quotes represent potential common ground or transition ideas.")

ANALYSIS 1: Philosophical Territories in Embedding Space
Computing dimensionality reductions...
✓ UMAP variance preserved: Not applicable (non-linear)
✓ PCA variance explained: 13.89%



📊 TERRITORIAL ANALYSIS:
----------------------------------------

UMAP Results:
  • Border quotes (mixed neighborhoods): 57 (4.9%)
  • Suggests distinct territories

PCA Results:
  • Border quotes (mixed neighborhoods): 239 (20.6%)
  • Suggests moderate overlap

💡 PHILOSOPHICAL INSIGHT:
If the philosophers occupy distinct regions with minimal overlap,
it suggests fundamentally different conceptual frameworks.
Border quotes represent potential common ground or transition ideas.


In [None]:
# ============================================
# ANALYSIS 2: CONCEPT FREQUENCY RADAR CHARTS
# ============================================

print("ANALYSIS 2: Philosophical Fingerprints via Concept Radar Charts")
print("=" * 60)

# Merge the analysis data to get concept information
merged_df = metadata.merge(analysis_df[['row_id', 'core_concepts_primary', 'core_concepts_secondary']],
                           on='row_id', how='left')

# Parse and collect all primary concepts
all_primary_concepts = []
philosopher_concepts = {'Confucius': [], 'Mozi': []}

for _, row in merged_df.iterrows():
    if pd.notna(row['core_concepts_primary']):
        concepts = parse_semicolon_field(row['core_concepts_primary'])
        all_primary_concepts.extend(concepts)
        philosopher_concepts[row['philosopher']].extend(concepts)

# Count concept frequencies for each philosopher
from collections import Counter

confucius_counts = Counter(philosopher_concepts['Confucius'])
mozi_counts = Counter(philosopher_concepts['Mozi'])

# Get top 15 most common concepts across both philosophers
all_concept_counts = Counter(all_primary_concepts)
top_concepts = [concept for concept, _ in all_concept_counts.most_common(15)]

print(f"✓ Found {len(set(all_primary_concepts))} unique primary concepts")
print(f"✓ Confucius total concept mentions: {len(philosopher_concepts['Confucius'])}")
print(f"✓ Mozi total concept mentions: {len(philosopher_concepts['Mozi'])}")

# Prepare data for radar chart
radar_data = []
for concept in top_concepts:
    # Normalize by total concepts to get relative frequency
    conf_freq = confucius_counts.get(concept, 0) / len(philosopher_concepts['Confucius']) * 100
    mozi_freq = mozi_counts.get(concept, 0) / len(philosopher_concepts['Mozi']) * 100
    radar_data.append({
        'concept': concept.replace('_', ' ').title(),
        'Confucius': conf_freq,
        'Mozi': mozi_freq
    })

radar_df = pd.DataFrame(radar_data)

# Create side-by-side radar charts
fig = go.Figure()

# Add Confucius trace
fig.add_trace(go.Scatterpolar(
    r=radar_df['Confucius'].values,
    theta=radar_df['concept'].values,
    fill='toself',
    name='Confucius',
    line_color='#1f77b4',
    fillcolor='rgba(31, 119, 180, 0.3)',
    hovertemplate='%{theta}<br>Frequency: %{r:.1f}%<extra></extra>'
))

# Add Mozi trace
fig.add_trace(go.Scatterpolar(
    r=radar_df['Mozi'].values,
    theta=radar_df['concept'].values,
    fill='toself',
    name='Mozi',
    line_color='#ff7f0e',
    fillcolor='rgba(255, 127, 14, 0.3)',
    hovertemplate='%{theta}<br>Frequency: %{r:.1f}%<extra></extra>'
))

fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, max(radar_df['Confucius'].max(), radar_df['Mozi'].max()) * 1.1],
            ticksuffix='%'
        )
    ),
    showlegend=True,
    title={
        'text': "<b>Philosophical Fingerprints: Core Concept Frequencies</b><br><sup>What does each philosopher emphasize?</sup>",
        'x': 0.5,
        'xanchor': 'center'
    },
    height=700,
    width=900,
    template='plotly_white'
)

fig.show()

# Create comparison bar chart for clearer differences
print("\n📊 CONCEPT EMPHASIS COMPARISON:")
print("-" * 40)

# Calculate difference in emphasis
radar_df['Difference'] = radar_df['Mozi'] - radar_df['Confucius']
radar_df['Abs_Difference'] = radar_df['Difference'].abs()
radar_df_sorted = radar_df.sort_values('Difference')

# Create diverging bar chart
fig2 = go.Figure()

# Add bars colored by who emphasizes more
colors = ['#ff7f0e' if x > 0 else '#1f77b4' for x in radar_df_sorted['Difference']]

fig2.add_trace(go.Bar(
    y=radar_df_sorted['concept'],
    x=radar_df_sorted['Difference'],
    orientation='h',
    marker_color=colors,
    text=[f"{abs(x):.1f}%" for x in radar_df_sorted['Difference']],
    textposition='outside',
    hovertemplate='%{y}<br>Difference: %{x:.1f}%<br>(Positive = Mozi emphasizes more)<extra></extra>'
))

fig2.update_layout(
    title={
        'text': "<b>Concept Emphasis Differences</b><br><sup>← Confucius emphasizes more | Mozi emphasizes more →</sup>",
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title="Difference in Frequency (%)",
    yaxis_title="",
    height=600,
    width=900,
    template='plotly_white',
    xaxis=dict(zeroline=True, zerolinewidth=2, zerolinecolor='black'),
    showlegend=False
)

fig2.show()

# Analysis summary
print("\nTop concepts by philosopher:")
print("\n🔷 CONFUCIUS's Top 5 Concepts:")
for concept, count in confucius_counts.most_common(5):
    freq = count / len(philosopher_concepts['Confucius']) * 100
    print(f"  • {concept.replace('_', ' ').title()}: {freq:.1f}%")

print("\n🔶 MOZI's Top 5 Concepts:")
for concept, count in mozi_counts.most_common(5):
    freq = count / len(philosopher_concepts['Mozi']) * 100
    print(f"  • {concept.replace('_', ' ').title()}: {freq:.1f}%")

# Find unique concepts
unique_confucius = set(confucius_counts.keys()) - set(mozi_counts.keys())
unique_mozi = set(mozi_counts.keys()) - set(confucius_counts.keys())

print(f"\n💡 PHILOSOPHICAL INSIGHT:")
print(f"  • Confucius has {len(unique_confucius)} unique concepts")
print(f"  • Mozi has {len(unique_mozi)} unique concepts")
print(f"  • Shared concepts: {len(set(confucius_counts.keys()) & set(mozi_counts.keys()))}")
print("\nThe radar chart reveals each philosopher's 'conceptual DNA' -")
print("what ideas they return to repeatedly in their teachings.")

ANALYSIS 2: Philosophical Fingerprints via Concept Radar Charts
✓ Found 1027 unique primary concepts
✓ Confucius total concept mentions: 1246
✓ Mozi total concept mentions: 1579



📊 CONCEPT EMPHASIS COMPARISON:
----------------------------------------



Top concepts by philosopher:

🔷 CONFUCIUS's Top 5 Concepts:
  • Virtue: 5.6%
  • Junzi: 5.4%
  • Propriety: 4.9%
  • Benevolence: 4.0%
  • Ren: 3.9%

🔶 MOZI's Top 5 Concepts:
  • Yi: 3.7%
  • Defensive Military Strategy: 2.5%
  • Meritocracy: 2.3%
  • Universal Love: 2.2%
  • Ren: 1.9%

💡 PHILOSOPHICAL INSIGHT:
  • Confucius has 323 unique concepts
  • Mozi has 622 unique concepts
  • Shared concepts: 82

The radar chart reveals each philosopher's 'conceptual DNA' -
what ideas they return to repeatedly in their teachings.


In [None]:
# ============================================
# ANALYSIS 3: RHETORICAL STRATEGY COMPARISON
# ============================================

print("ANALYSIS 3: Rhetorical Strategies - How They Argue")
print("=" * 60)

# Ensure we have the analysis data with rhetorical fields
rhetorical_df = metadata.merge(
    analysis_df[['row_id', 'argument_method', 'argument_devices', 'argument_structure']],
    on='row_id',
    how='left'
)

# Remove rows with missing rhetorical data
rhetorical_df = rhetorical_df.dropna(subset=['argument_method'])
print(f"✓ Analyzing {len(rhetorical_df)} quotes with rhetorical data")

# 1. ARGUMENT METHODS COMPARISON
print("\n📊 ARGUMENT METHODS:")
print("-" * 40)

method_comparison = []
for philosopher in ['Confucius', 'Mozi']:
    phil_data = rhetorical_df[rhetorical_df['philosopher'] == philosopher]
    method_counts = phil_data['argument_method'].value_counts()
    method_freq = (method_counts / len(phil_data) * 100).to_dict()

    for method, freq in method_freq.items():
        method_comparison.append({
            'Philosopher': philosopher,
            'Method': method.replace('_', ' ').title(),
            'Frequency': freq,
            'Count': method_counts[method]
        })

method_df = pd.DataFrame(method_comparison)

# Create grouped bar chart for argument methods
fig_methods = go.Figure()

for philosopher in ['Confucius', 'Mozi']:
    phil_methods = method_df[method_df['Philosopher'] == philosopher]
    color = '#1f77b4' if philosopher == 'Confucius' else '#ff7f0e'

    fig_methods.add_trace(go.Bar(
        name=philosopher,
        x=phil_methods['Method'],
        y=phil_methods['Frequency'],
        marker_color=color,
        text=[f"{freq:.1f}%" for freq in phil_methods['Frequency']],
        textposition='outside',
        hovertemplate='%{x}<br>Frequency: %{y:.1f}%<br>Count: %{customdata}<extra></extra>',
        customdata=phil_methods['Count']
    ))

fig_methods.update_layout(
    title="<b>Argument Methods: How They Make Their Points</b>",
    xaxis_title="Argument Method",
    yaxis_title="Frequency (%)",
    barmode='group',
    height=500,
    width=1000,
    template='plotly_white',
    legend=dict(x=0.85, y=0.95)
)

fig_methods.show()

# 2. ARGUMENT DEVICES ANALYSIS
print("\n📊 RHETORICAL DEVICES:")
print("-" * 40)

# Parse and collect all devices
all_devices = []
philosopher_devices = {'Confucius': [], 'Mozi': []}

for _, row in rhetorical_df.iterrows():
    if pd.notna(row['argument_devices']):
        devices = parse_semicolon_field(row['argument_devices'])
        all_devices.extend(devices)
        philosopher_devices[row['philosopher']].extend(devices)

# Count device frequencies
from collections import Counter
confucius_devices = Counter(philosopher_devices['Confucius'])
mozi_devices = Counter(philosopher_devices['Mozi'])

# Get top 10 most common devices
all_device_counts = Counter(all_devices)
top_devices = [device for device, _ in all_device_counts.most_common(10)]

# Create comparison data
device_comparison = []
for device in top_devices:
    conf_count = confucius_devices.get(device, 0)
    mozi_count = mozi_devices.get(device, 0)

    device_comparison.append({
        'Device': device.replace('_', ' ').title(),
        'Confucius': conf_count / len(philosopher_devices['Confucius']) * 100,
        'Mozi': mozi_count / len(philosopher_devices['Mozi']) * 100,
        'Difference': (mozi_count / len(philosopher_devices['Mozi']) * 100) -
                     (conf_count / len(philosopher_devices['Confucius']) * 100)
    })

device_df = pd.DataFrame(device_comparison)
device_df = device_df.sort_values('Difference')

# Create diverging bar chart for devices
fig_devices = go.Figure()

colors = ['#ff7f0e' if x > 0 else '#1f77b4' for x in device_df['Difference']]

fig_devices.add_trace(go.Bar(
    y=device_df['Device'],
    x=device_df['Difference'],
    orientation='h',
    marker_color=colors,
    text=[f"{abs(x):.1f}%" for x in device_df['Difference']],
    textposition='outside',
    hovertemplate='%{y}<br>Difference: %{x:.1f}%<br>(Positive = Mozi uses more)<extra></extra>'
))

fig_devices.update_layout(
    title={
        'text': "<b>Rhetorical Device Preferences</b><br><sup>← Confucius uses more | Mozi uses more →</sup>",
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title="Difference in Usage (%)",
    yaxis_title="",
    height=500,
    width=900,
    template='plotly_white',
    xaxis=dict(zeroline=True, zerolinewidth=2, zerolinecolor='black')
)

fig_devices.show()

# 3. ARGUMENT STRUCTURE PATTERNS
print("\n📊 ARGUMENT STRUCTURES:")
print("-" * 40)

# Analyze argument structures
structure_pivot = rhetorical_df.groupby(['philosopher', 'argument_structure']).size().unstack(fill_value=0)
structure_pct = structure_pivot.div(structure_pivot.sum(axis=1), axis=0) * 100

# Create heatmap
fig_structure = go.Figure(data=go.Heatmap(
    z=structure_pct.values,
    x=structure_pct.columns.str.replace('_', ' ').str.title(),
    y=structure_pct.index,
    colorscale='RdBu_r',
    text=[[f"{val:.1f}%" for val in row] for row in structure_pct.values],
    texttemplate="%{text}",
    textfont={"size": 12},
    colorbar=dict(title="Frequency (%)")
))

fig_structure.update_layout(
    title="<b>Argument Structure Preferences</b>",
    xaxis_title="Structure Type",
    yaxis_title="Philosopher",
    height=400,
    width=900,
    template='plotly_white'
)

fig_structure.show()

# Summary statistics
print("\n💡 RHETORICAL INSIGHTS:")
print("-" * 40)

for philosopher in ['Confucius', 'Mozi']:
    phil_data = rhetorical_df[rhetorical_df['philosopher'] == philosopher]

    # Most common method
    top_method = phil_data['argument_method'].value_counts().iloc[0]
    top_method_name = phil_data['argument_method'].value_counts().index[0]

    # Most common structure
    top_structure = phil_data['argument_structure'].value_counts().iloc[0]
    top_structure_name = phil_data['argument_structure'].value_counts().index[0]

    # Device variety
    unique_devices = set()
    for devices in phil_data['argument_devices'].dropna():
        unique_devices.update(parse_semicolon_field(devices))

    print(f"\n{philosopher}:")
    print(f"  • Primary method: {top_method_name.replace('_', ' ').title()} ({top_method/len(phil_data)*100:.1f}%)")
    print(f"  • Primary structure: {top_structure_name.replace('_', ' ').title()} ({top_structure/len(phil_data)*100:.1f}%)")
    print(f"  • Rhetorical device variety: {len(unique_devices)} unique devices")

print("\n🔍 INTERPRETATION:")
print("These patterns reveal HOW each philosopher persuades:")
print("• Methods show their logical approach (analogies vs principles)")
print("• Devices reveal their rhetorical toolkit")
print("• Structures indicate how they organize arguments")

ANALYSIS 3: Rhetorical Strategies - How They Argue
✓ Analyzing 1057 quotes with rhetorical data

📊 ARGUMENT METHODS:
----------------------------------------



📊 RHETORICAL DEVICES:
----------------------------------------



📊 ARGUMENT STRUCTURES:
----------------------------------------



💡 RHETORICAL INSIGHTS:
----------------------------------------

Confucius:
  • Primary method: Exemplification (5.2%)
  • Primary structure: Assertion With Qualification (2.7%)
  • Rhetorical device variety: 277 unique devices

Mozi:
  • Primary method: Empirical Observation; Technical Specification (3.2%)
  • Primary structure: Problem-Solution (6.5%)
  • Rhetorical device variety: 307 unique devices

🔍 INTERPRETATION:
These patterns reveal HOW each philosopher persuades:
• Methods show their logical approach (analogies vs principles)
• Devices reveal their rhetorical toolkit
• Structures indicate how they organize arguments


In [None]:
# ============================================
# ANALYSIS 4: METAPHOR SOURCE DOMAIN ANALYSIS
# ============================================

print("ANALYSIS 4: Metaphor Source Domains - Where They Find Meaning")
print("=" * 60)

# Merge to get metaphor data
metaphor_df = metadata.merge(
    analysis_df[['row_id', 'metaphors_json']],
    on='row_id',
    how='left'
)

# Parse metaphors and extract source domains
def extract_metaphor_domains(metaphor_json):
    """Extract source domains from metaphor JSON."""
    domains = []
    if pd.notna(metaphor_json):
        try:
            metaphors = json.loads(metaphor_json)
            for metaphor in metaphors:
                if isinstance(metaphor, str):
                    # Simple heuristic: first noun-like word often indicates domain
                    domains.append(metaphor.split()[0] if metaphor else 'unknown')
                elif isinstance(metaphor, dict) and 'domain' in metaphor:
                    domains.append(metaphor['domain'])
        except:
            pass
    return domains

# Categorize metaphors into broader domains
domain_categories = {
    'Nature': ['water', 'river', 'mountain', 'tree', 'plant', 'animal', 'bird', 'fish',
               'wind', 'sky', 'earth', 'stone', 'jade', 'gold', 'season', 'weather'],
    'Governance': ['ruler', 'king', 'minister', 'state', 'government', 'law', 'decree',
                   'territory', 'border', 'army', 'soldier', 'war', 'peace'],
    'Crafts': ['craft', 'tool', 'wheel', 'cart', 'boat', 'building', 'construction',
               'weaving', 'pottery', 'smith', 'carpenter', 'artisan', 'skill'],
    'Family': ['father', 'mother', 'son', 'daughter', 'brother', 'sister', 'ancestor',
               'family', 'parent', 'child', 'elder', 'younger'],
    'Body': ['heart', 'mind', 'body', 'hand', 'eye', 'ear', 'mouth', 'face', 'head',
             'foot', 'blood', 'bone', 'health', 'illness'],
    'Agriculture': ['farm', 'field', 'crop', 'harvest', 'seed', 'grain', 'rice', 'soil',
                    'plow', 'cultivation', 'farmer', 'agriculture'],
    'Learning': ['teacher', 'student', 'master', 'disciple', 'learning', 'knowledge',
                 'wisdom', 'book', 'study', 'practice', 'cultivation'],
    'Journey': ['path', 'road', 'way', 'journey', 'travel', 'destination', 'bridge',
                'gate', 'door', 'step', 'walking', 'distance']
}

# Collect all metaphors by philosopher
philosopher_metaphors = {'Confucius': [], 'Mozi': []}
philosopher_domain_counts = {'Confucius': {d: 0 for d in domain_categories.keys()},
                            'Mozi': {d: 0 for d in domain_categories.keys()}}

for _, row in metaphor_df.iterrows():
    if pd.notna(row['metaphors_json']):
        try:
            metaphors = json.loads(row['metaphors_json'])
            if metaphors:  # Check if list is not empty
                philosopher_metaphors[row['philosopher']].extend(metaphors)

                # Categorize each metaphor
                for metaphor in metaphors:
                    metaphor_lower = str(metaphor).lower()
                    categorized = False
                    for domain, keywords in domain_categories.items():
                        if any(keyword in metaphor_lower for keyword in keywords):
                            philosopher_domain_counts[row['philosopher']][domain] += 1
                            categorized = True
                            break
                    if not categorized:
                        # Add to 'Other' category if not found
                        if 'Other' not in philosopher_domain_counts[row['philosopher']]:
                            philosopher_domain_counts[row['philosopher']]['Other'] = 0
                        philosopher_domain_counts[row['philosopher']]['Other'] += 1
        except:
            pass

# Prepare data for visualization
domain_data = []
for domain in list(domain_categories.keys()) + ['Other']:
    if domain in philosopher_domain_counts['Confucius'] or domain in philosopher_domain_counts['Mozi']:
        conf_count = philosopher_domain_counts['Confucius'].get(domain, 0)
        mozi_count = philosopher_domain_counts['Mozi'].get(domain, 0)

        # Only include domains that have at least some metaphors
        if conf_count > 0 or mozi_count > 0:
            domain_data.append({
                'Domain': domain,
                'Confucius': conf_count,
                'Mozi': mozi_count,
                'Total': conf_count + mozi_count
            })

domain_df = pd.DataFrame(domain_data)
domain_df = domain_df.sort_values('Total', ascending=True)

# Create bubble chart
fig_bubble = go.Figure()

# Calculate positions for bubble chart
import numpy as np
x_positions = {'Confucius': 1, 'Mozi': 3}
y_scale = np.linspace(0, len(domain_df)-1, len(domain_df))

for i, row in enumerate(domain_df.itertuples()):
    # Confucius bubble
    if row.Confucius > 0:
        fig_bubble.add_trace(go.Scatter(
            x=[x_positions['Confucius']],
            y=[y_scale[i]],
            mode='markers+text',
            marker=dict(
                size=np.sqrt(row.Confucius) * 15,
                color='#1f77b4',
                opacity=0.7,
                line=dict(color='white', width=2)
            ),
            text=f"{row.Confucius}",
            textposition="middle center",
            textfont=dict(color='white', size=10),
            name='Confucius',
            showlegend=i==0,
            hovertemplate=f"{row.Domain}<br>Count: {row.Confucius}<extra></extra>"
        ))

    # Mozi bubble
    if row.Mozi > 0:
        fig_bubble.add_trace(go.Scatter(
            x=[x_positions['Mozi']],
            y=[y_scale[i]],
            mode='markers+text',
            marker=dict(
                size=np.sqrt(row.Mozi) * 15,
                color='#ff7f0e',
                opacity=0.7,
                line=dict(color='white', width=2)
            ),
            text=f"{row.Mozi}",
            textposition="middle center",
            textfont=dict(color='white', size=10),
            name='Mozi',
            showlegend=i==0,
            hovertemplate=f"{row.Domain}<br>Count: {row.Mozi}<extra></extra>"
        ))

    # Domain label
    fig_bubble.add_trace(go.Scatter(
        x=[2],
        y=[y_scale[i]],
        mode='text',
        text=row.Domain,
        textposition="middle center",
        textfont=dict(size=12, color='black'),
        showlegend=False,
        hoverinfo='skip'
    ))

fig_bubble.update_layout(
    title={
        'text': "<b>Metaphor Source Domains</b><br><sup>What imagery do they use to explain their ideas?</sup>",
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis=dict(
        range=[0, 4],
        showticklabels=False,
        showgrid=False,
        zeroline=False
    ),
    yaxis=dict(
        showticklabels=False,
        showgrid=False,
        zeroline=False
    ),
    height=600,
    width=900,
    template='plotly_white',
    annotations=[
        dict(x=1, y=-1.5, text="<b>Confucius</b>", showarrow=False, font=dict(size=14, color='#1f77b4')),
        dict(x=3, y=-1.5, text="<b>Mozi</b>", showarrow=False, font=dict(size=14, color='#ff7f0e'))
    ]
)

fig_bubble.show()

# Create stacked percentage bar chart
domain_df['Conf_pct'] = domain_df['Confucius'] / domain_df['Total'] * 100
domain_df['Mozi_pct'] = domain_df['Mozi'] / domain_df['Total'] * 100

fig_stacked = go.Figure()

fig_stacked.add_trace(go.Bar(
    y=domain_df['Domain'],
    x=domain_df['Conf_pct'],
    name='Confucius',
    orientation='h',
    marker_color='#1f77b4',
    hovertemplate='%{y}<br>Confucius: %{x:.1f}%<extra></extra>'
))

fig_stacked.add_trace(go.Bar(
    y=domain_df['Domain'],
    x=domain_df['Mozi_pct'],
    name='Mozi',
    orientation='h',
    marker_color='#ff7f0e',
    hovertemplate='%{y}<br>Mozi: %{x:.1f}%<extra></extra>'
))

fig_stacked.update_layout(
    title="<b>Metaphor Domain Distribution</b><br><sup>Relative preference for each source domain</sup>",
    xaxis_title="Percentage of Domain Usage",
    yaxis_title="",
    barmode='stack',
    height=500,
    width=900,
    template='plotly_white',
    legend=dict(x=0.85, y=0.95)
)

fig_stacked.show()

# Print insights
print("\n💡 METAPHORICAL INSIGHTS:")
print("-" * 40)

print(f"\nTotal metaphors found:")
print(f"  • Confucius: {len(philosopher_metaphors['Confucius'])} metaphors")
print(f"  • Mozi: {len(philosopher_metaphors['Mozi'])} metaphors")

print("\nTop 3 domains by philosopher:")
for philosopher in ['Confucius', 'Mozi']:
    print(f"\n{philosopher}:")
    sorted_domains = sorted(philosopher_domain_counts[philosopher].items(),
                          key=lambda x: x[1], reverse=True)[:3]
    for domain, count in sorted_domains:
        if count > 0:
            pct = count / sum(philosopher_domain_counts[philosopher].values()) * 100
            print(f"  • {domain}: {count} metaphors ({pct:.1f}%)")

# Find distinctive domains
print("\n🔍 DISTINCTIVE PREFERENCES:")
for domain in domain_df['Domain'].values:
    row = domain_df[domain_df['Domain'] == domain].iloc[0]
    if row['Total'] >= 5:  # Only consider domains with enough data
        if row['Conf_pct'] > 70:
            print(f"  • {domain}: Strongly preferred by Confucius ({row['Conf_pct']:.0f}%)")
        elif row['Mozi_pct'] > 70:
            print(f"  • {domain}: Strongly preferred by Mozi ({row['Mozi_pct']:.0f}%)")

print("\n📚 INTERPRETATION:")
print("Metaphor domains reveal how each philosopher sees the world:")
print("• Nature metaphors → organic, harmonious worldview")
print("• Governance metaphors → political, hierarchical thinking")
print("• Craft metaphors → practical, skill-based approach")
print("• Family metaphors → relational, social emphasis")

ANALYSIS 4: Metaphor Source Domains - Where They Find Meaning



💡 METAPHORICAL INSIGHTS:
----------------------------------------

Total metaphors found:
  • Confucius: 401 metaphors
  • Mozi: 884 metaphors

Top 3 domains by philosopher:

Confucius:
  • Other: 210 metaphors (52.4%)
  • Nature: 63 metaphors (15.7%)
  • Governance: 32 metaphors (8.0%)

Mozi:
  • Other: 463 metaphors (52.4%)
  • Nature: 139 metaphors (15.7%)
  • Governance: 82 metaphors (9.3%)

🔍 DISTINCTIVE PREFERENCES:
  • Crafts: Strongly preferred by Mozi (81%)
  • Governance: Strongly preferred by Mozi (72%)

📚 INTERPRETATION:
Metaphor domains reveal how each philosopher sees the world:
• Nature metaphors → organic, harmonious worldview
• Governance metaphors → political, hierarchical thinking
• Craft metaphors → practical, skill-based approach
• Family metaphors → relational, social emphasis


In [None]:
# ============================================
# ANALYSIS 5: PHILOSOPHICAL BRIDGE FINDER (FIXED V2)
# ============================================

print("ANALYSIS 5: Finding Common Ground Through Bridge Quotes")
print("=" * 60)

# Find bridge quotes using the helper function
bridges = find_bridge_quotes(embeddings, metadata, n_bridges=10)

# Since metadata already has row_id and text, we can use it directly
# Let's just get additional info from analysis_df if available
concept_cols = ['row_id', 'core_concepts_primary', 'themes', 'argument_method']
concepts_available = [col for col in concept_cols if col in analysis_df.columns]
if concepts_available:
    concepts_for_merge = analysis_df[concepts_available].copy()
else:
    concepts_for_merge = None

print("\n🌉 BRIDGE QUOTES - Finding Common Ground")
print("=" * 60)

for bridge_type, bridge_df in bridges.items():
    source_phil = bridge_type.split('_to_')[0]
    target_phil = bridge_type.split('_to_')[1]

    print(f"\n📖 {source_phil} quotes closest to {target_phil}'s thinking:")
    print("-" * 50)

    # Bridge_df already has text from metadata
    bridge_full = bridge_df.copy()

    # Merge with concepts if available
    if concepts_for_merge is not None:
        bridge_full = bridge_full.merge(concepts_for_merge, on='row_id', how='left')

    # Display top 5 with analysis
    for i, row in enumerate(bridge_full.head(5).itertuples(), 1):
        print(f"\n{i}. Similarity Score: {row.similarity_to_other:.3f}")

        # Show quote from the text field
        quote = row.text if hasattr(row, 'text') and pd.notna(row.text) else "Text not available"
        if len(quote) > 300:
            quote = quote[:297] + "..."
        print(f"   Quote: \"{quote}\"")

        # Show source if available (from metadata)
        if hasattr(row, 'work') and pd.notna(row.work):
            source_info = f"{row.work}"
            if hasattr(row, 'chapter_verse') and pd.notna(row.chapter_verse):
                source_info += f" ({row.chapter_verse})"
            print(f"   Source: {source_info}")

        # Show concepts if available
        if hasattr(row, 'core_concepts_primary') and pd.notna(row.core_concepts_primary):
            concepts = parse_semicolon_field(row.core_concepts_primary)[:5]
            if concepts:
                print(f"   Key Concepts: {', '.join(concepts)}")

# Create visualization of bridge quotes in embedding space
print("\n\n📊 VISUALIZING BRIDGE QUOTES IN SEMANTIC SPACE")
print("-" * 50)

# Get UMAP coordinates (reuse from Analysis 1 if available, or compute new)
if 'viz_df' not in locals():
    umap_reducer = umap.UMAP(n_components=2, random_state=42, n_neighbors=30, min_dist=0.1)
    embeddings_umap = umap_reducer.fit_transform(embeddings)
    viz_df = metadata.copy()
    viz_df['UMAP_1'] = embeddings_umap[:, 0]
    viz_df['UMAP_2'] = embeddings_umap[:, 1]

# Create interactive plot highlighting bridge quotes
fig = go.Figure()

# Plot all quotes in gray first
for philosopher in ['Confucius', 'Mozi']:
    phil_data = viz_df[viz_df['philosopher'] == philosopher]

    # Non-bridge quotes (background)
    bridge_ids = set()
    for bridge_df in bridges.values():
        bridge_ids.update(bridge_df['row_id'].values)

    non_bridge_mask = ~phil_data['row_id'].isin(bridge_ids)
    non_bridge_data = phil_data[non_bridge_mask]

    fig.add_trace(go.Scatter(
        x=non_bridge_data['UMAP_1'],
        y=non_bridge_data['UMAP_2'],
        mode='markers',
        name=f'{philosopher} (regular)',
        marker=dict(
            color='lightgray',
            size=4,
            opacity=0.3
        ),
        hovertemplate='%{text}<extra></extra>',
        text=[f"{row['philosopher']}<br>{row['text'][:100]}..." for _, row in non_bridge_data.iterrows()],
        showlegend=True
    ))

# Highlight bridge quotes
colors = {'Confucius_to_Mozi': '#1f77b4', 'Mozi_to_Confucius': '#ff7f0e'}
symbols = {'Confucius_to_Mozi': 'circle', 'Mozi_to_Confucius': 'square'}

for bridge_type, bridge_df in bridges.items():
    # Get UMAP coordinates for bridge quotes
    bridge_viz = viz_df[viz_df['row_id'].isin(bridge_df['row_id'])]
    source_phil = bridge_type.split('_to_')[0]

    fig.add_trace(go.Scatter(
        x=bridge_viz['UMAP_1'],
        y=bridge_viz['UMAP_2'],
        mode='markers',
        name=f'{source_phil} bridges',
        marker=dict(
            color=colors[bridge_type],
            size=12,
            opacity=0.8,
            symbol=symbols[bridge_type],
            line=dict(width=2, color='white')
        ),
        text=[f"BRIDGE: {row['philosopher']}<br>{row['text'][:150]}..."
              for _, row in bridge_viz.iterrows()],
        hovertemplate='%{text}<extra></extra>'
    ))

# Add centroids as large markers
centroids = compute_philosopher_centroids(embeddings, metadata)
centroids_2d = umap_reducer.transform(np.array([centroids['Confucius'], centroids['Mozi']]))

fig.add_trace(go.Scatter(
    x=centroids_2d[:, 0],
    y=centroids_2d[:, 1],
    mode='markers+text',
    name='Centroids',
    marker=dict(
        color=['#1f77b4', '#ff7f0e'],
        size=20,
        symbol='star',
        line=dict(width=2, color='white')
    ),
    text=['Confucius Center', 'Mozi Center'],
    textposition='top center',
    showlegend=True
))

fig.update_layout(
    title={
        'text': "<b>Bridge Quotes: Philosophical Common Ground</b><br><sup>Highlighted quotes are closest to the other philosopher's thinking</sup>",
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title="UMAP Dimension 1",
    yaxis_title="UMAP Dimension 2",
    height=700,
    width=1000,
    template='plotly_white'
)

fig.show()

# Analyze common themes in bridge quotes
print("\n💡 BRIDGE QUOTE ANALYSIS - What Unites Them?")
print("-" * 50)

if concepts_for_merge is not None:
    all_bridge_ids = []
    for bridge_df in bridges.values():
        all_bridge_ids.extend(bridge_df['row_id'].values)

    bridge_concepts_df = concepts_for_merge[concepts_for_merge['row_id'].isin(all_bridge_ids)]

    # Collect all concepts from bridge quotes
    bridge_concepts = []
    bridge_themes = []

    for _, row in bridge_concepts_df.iterrows():
        if pd.notna(row['core_concepts_primary']):
            bridge_concepts.extend(parse_semicolon_field(row['core_concepts_primary']))
        if pd.notna(row['themes']):
            bridge_themes.extend(parse_semicolon_field(row['themes']))

    from collections import Counter
    concept_counts = Counter(bridge_concepts)
    theme_counts = Counter(bridge_themes)

    if concept_counts:
        print("\nMost common concepts in bridge quotes:")
        for concept, count in concept_counts.most_common(5):
            print(f"  • {concept}: appears in {count} bridge quotes")

    if theme_counts:
        print("\nMost common themes in bridge quotes:")
        for theme, count in theme_counts.most_common(5):
            print(f"  • {theme}: appears in {count} bridge quotes")

print("\n🔍 INTERPRETATION:")
print("-" * 50)
print("Bridge quotes reveal surprising common ground between the philosophers.")
print("These are the ideas where their thinking converges, suggesting")
print("universal principles or shared cultural values that transcend")
print("their philosophical differences. They may represent:")
print("• Shared cultural foundations")
print("• Universal human concerns")
print("• Points where their philosophies complement rather than oppose")

ANALYSIS 5: Finding Common Ground Through Bridge Quotes
✓ Computed centroid for Confucius: shape (384,)
✓ Computed centroid for Mozi: shape (384,)

🌉 BRIDGE QUOTES - Finding Common Ground

📖 Confucius quotes closest to Mozi's thinking:
--------------------------------------------------

1. Similarity Score: 0.661
   Quote: "'En Ch'eng Tzu killed Duke Chien. After washing himself ceremonially, Confucius went to court and reported to Duke Ai, saying, ‘He, Ch'en Heng has killed his lord. May I request that an army be sent to punish him?' The Duke answered, ‘Tell the three noble lords.' Confucius said, ‘I have reported ..."
   Source: Analects (14:21)
   Key Concepts: duty, ritual_purification, feudal_hierarchy

2. Similarity Score: 0.649
   Quote: "The Master said, 'I cannot say.' Meng Wu Po repeated the question. The Master said, 'Yu can be given the responsibility of managing the military levies in a state of a thousand chariots, but whether he is benevolent or not I cannot say.' [2] 'W


💡 BRIDGE QUOTE ANALYSIS - What Unites Them?
--------------------------------------------------

Most common concepts in bridge quotes:
  • benevolence: appears in 3 bridge quotes
  • yi: appears in 3 bridge quotes
  • impartial_caring: appears in 3 bridge quotes
  • ritual: appears in 2 bridge quotes
  • the_way: appears in 2 bridge quotes

Most common themes in bridge quotes:
  • ethical_governance: appears in 4 bridge quotes
  • ritual_practice: appears in 2 bridge quotes
  • political_ethics: appears in 2 bridge quotes
  • social_order: appears in 2 bridge quotes
  • moral_character: appears in 1 bridge quotes

🔍 INTERPRETATION:
--------------------------------------------------
Bridge quotes reveal surprising common ground between the philosophers.
These are the ideas where their thinking converges, suggesting
universal principles or shared cultural values that transcend
their philosophical differences. They may represent:
• Shared cultural foundations
• Universal human concerns
•

In [None]:
# ============================================
# FIX NOTEBOOK FOR COLAB GITHUB SAVE
# ============================================

import IPython
from google.colab import files

print("Fixing notebook for Colab's GitHub save...")
print("=" * 50)

# Remove widget metadata that's causing the issue
IPython.get_ipython().run_cell_magic('javascript', '', '''
// Remove problematic widget metadata
if (window.Jupyter && window.Jupyter.notebook) {
    var cells = Jupyter.notebook.get_cells();
    cells.forEach(function(cell) {
        if (cell.metadata.widgets) {
            delete cell.metadata.widgets;
        }
    });
    Jupyter.notebook.save_checkpoint();
    console.log("Cleaned widget metadata from all cells");
}
''')

print("\n✅ Widget metadata removed!")
print("\n📝 INSTRUCTIONS:")
print("1. Run this cell")
print("2. Wait 2-3 seconds for the cleanup to complete")
print("3. Use File → Save a copy in GitHub")
print("4. It should now save without errors")
print("\nNote: Your visualizations and code are preserved,")
print("just the problematic metadata is removed.")

# Alternative if you want to verify the fix worked
print("\n" + "=" * 50)
print("Optional: Verify the cleanup worked")
print("=" * 50)
print("After running this cell, you can check:")
print("• File → Save a copy in GitHub")
print("• If it still shows an error, run this cell again")
print("• The Plotly charts will still work when you rerun the notebook")

Fixing notebook for Colab's GitHub save...


<IPython.core.display.Javascript object>


✅ Widget metadata removed!

📝 INSTRUCTIONS:
1. Run this cell
2. Wait 2-3 seconds for the cleanup to complete
3. Use File → Save a copy in GitHub
4. It should now save without errors

Note: Your visualizations and code are preserved,
just the problematic metadata is removed.

Optional: Verify the cleanup worked
After running this cell, you can check:
• File → Save a copy in GitHub
• If it still shows an error, run this cell again
• The Plotly charts will still work when you rerun the notebook


In [None]:
# ============================================
# FIX WIDGET METADATA BY ADDING MISSING STATE
# ============================================

import IPython
from IPython.display import Javascript

print("Adding missing 'state' to widget metadata...")

# Add the missing state key to widget metadata
IPython.display.display(Javascript('''
    require(["base/js/namespace"], function(Jupyter) {
        var cells = Jupyter.notebook.get_cells();
        cells.forEach(function(cell) {
            if (cell.metadata && cell.metadata.widgets) {
                // Add empty state if it doesn't exist
                if (!cell.metadata.widgets.state) {
                    cell.metadata.widgets.state = {};
                }
                console.log("Fixed widget metadata for cell");
            }
        });
        // Save the notebook
        Jupyter.notebook.save_checkpoint();
        console.log("Notebook saved with fixed metadata");
    });
'''))

print("✅ Fixed! Added missing 'state' key to widget metadata")
print("\n📝 NOW TRY:")
print("1. Wait 3 seconds for the fix to complete")
print("2. Go to File → Save a copy in GitHub")
print("3. It should work now!")
print("\nIf it still doesn't work, try:")
print("• File → Save (regular save first)")
print("• Then File → Save a copy in GitHub")

Adding missing 'state' to widget metadata...


<IPython.core.display.Javascript object>

✅ Fixed! Added missing 'state' key to widget metadata

📝 NOW TRY:
1. Wait 3 seconds for the fix to complete
2. Go to File → Save a copy in GitHub
3. It should work now!

If it still doesn't work, try:
• File → Save (regular save first)
• Then File → Save a copy in GitHub
