In [1]:
from torch.utils.data import DataLoader, Dataset
from typing import List
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import gc

In [2]:
from einops import einsum

In [3]:
probes_weights = torch.load('/workspace/llm-progress-monitor/qwen3_4b_weight_tensor.pt').to(dtype=torch.float32)

In [5]:
# Get PCA components 1, 2, 4 using sklearn PCA
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt

pca = PCA(n_components=9)
pca.fit(probes_weights.detach().cpu().numpy())
# Select components 1, 2, 4 (0-indexed: 0, 1, 3)
selected_components = torch.tensor(pca.components_, dtype=torch.float32).to('cuda', dtype=torch.bfloat16)  # Shape: [3, 2560]
print(f"Selected PCA components shape: {selected_components.shape}")

def get_ema_coords(coords, alpha=0.99):
    given_alpha = alpha
    coords_list = coords.tolist()
    
    ema_coords = []
    ema_coords_list = []
    cur_ema = None
    for i, coord in enumerate(coords_list):
        # Use a smooth transition from 0.5 to given_alpha, reaching given_alpha at 200 tokens
        alpha = given_alpha
        if cur_ema is None:
            cur_ema = coord
        else:
            cur_ema = [alpha*(cur_ema[j]) + (1-alpha)*coord[j] for j in range(len(coord))]
        ema_coords.append(cur_ema)
    return torch.tensor(ema_coords)

def process_activations(activations):
    seq_coords = einsum(activations, selected_components, 's h, n h -> s n')
    ema_seq_coords = get_ema_coords(seq_coords)
    np_coords = ema_seq_coords.detach().cpu().numpy()
    return np_coords

# Process first 10 activation files
all_coords = []
for i in range(0,30):
    activations = torch.load(f'/workspace/llm-progress-monitor/rollouts/activations/{i}.pt')[15]
    np_coords = process_activations(activations)
    all_coords.append(np_coords)
    print(f"Processed file {i}, shape: {np_coords.shape}")


Selected PCA components shape: torch.Size([9, 2560])
Processed file 0, shape: (1403, 9)
Processed file 1, shape: (4341, 9)
Processed file 2, shape: (447, 9)
Processed file 3, shape: (498, 9)
Processed file 4, shape: (614, 9)
Processed file 5, shape: (1769, 9)
Processed file 6, shape: (2590, 9)
Processed file 7, shape: (444, 9)
Processed file 8, shape: (1148, 9)
Processed file 9, shape: (1150, 9)
Processed file 10, shape: (343, 9)
Processed file 11, shape: (1707, 9)
Processed file 12, shape: (1651, 9)
Processed file 13, shape: (1717, 9)
Processed file 14, shape: (914, 9)
Processed file 15, shape: (2133, 9)
Processed file 16, shape: (1341, 9)
Processed file 17, shape: (699, 9)
Processed file 18, shape: (1542, 9)
Processed file 19, shape: (1637, 9)
Processed file 20, shape: (353, 9)
Processed file 21, shape: (574, 9)
Processed file 22, shape: (909, 9)
Processed file 23, shape: (541, 9)
Processed file 24, shape: (1580, 9)
Processed file 25, shape: (968, 9)
Processed file 26, shape: (1064, 

In [8]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# First, project the weight vectors into the same 3D PCA space
weight_pca_coords = einsum(probes_weights.to('cuda').to(torch.float32), selected_components.to(torch.float32), 'p h, n h -> p n').detach().cpu().numpy()
print(f"Weight PCA coordinates shape: {weight_pca_coords.shape}")

# Create 2x5 subplots for 3D trajectories
fig = make_subplots(
    rows=2, cols=5,
    specs=[[{'type': 'scatter3d'} for _ in range(5)] for _ in range(2)],
    subplot_titles=[f'Sequence {i}' for i in range(10)]
)

# Add each trajectory to its subplot
for i in range(10):
    coords = all_coords[i]
    row = (i // 5) + 1
    col = (i % 5) + 1
    
    # Create color values as token position
    color_values = list(range(len(coords)))
    
    # Add trajectory line and markers
    fig.add_trace(
        go.Scatter3d(
            x=coords[:, 0],
            y=coords[:, 1], 
            z=coords[:, 2],
            mode='markers+lines',
            marker=dict(
                size=3,
                color=color_values,
                colorscale='viridis',
                showscale=True if i == 0 else False,  # Show colorbar only for first subplot
                colorbar=dict(
                    title="Token Position",
                    x=1.02,
                    len=0.4
                ) if i == 0 else None
            ),
            line=dict(
                color='rgba(0,0,0,0.3)',
                width=2
            ),
            name='Trajectory',
            showlegend=False
        ),
        row=row, col=col
    )
    
    # Add weight PCA points as reference
    fig.add_trace(
        go.Scatter3d(
            x=weight_pca_coords[:, 0],
            y=weight_pca_coords[:, 1],
            z=weight_pca_coords[:, 2],
            mode='markers+text',
            marker=dict(
                size=5,
                color='red',
                symbol='diamond',
                opacity=0.7,
                line=dict(color='darkred', width=1)
            ),
            text=[str(j) for j in range(len(weight_pca_coords))],
            textposition='top center',
            textfont=dict(size=8, color='darkred'),
            name='Weight Points',
            showlegend=True if i == 0 else False
        ),
        row=row, col=col
    )

fig.update_layout(
    title='3D Trajectories for All Sequences',
    width=1600,
    height=800
)

# Update scene properties for all subplots
for i in range(1, 11):
    row = ((i-1) // 5) + 1
    col = ((i-1) % 5) + 1
    fig.update_scenes(
        xaxis_title='PC1',
        yaxis_title='PC2',
        zaxis_title='PC3',
        row=row, col=col
    )

fig.show()

Weight PCA coordinates shape: (20, 9)


In [9]:
# 2D version: Create 2x5 subplots for 2D trajectories (PC1 vs PC2)
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

fig_2d = make_subplots(
    rows=2, cols=5,
    subplot_titles=[f'Sequence {i}' for i in range(10)]
)

# Add each trajectory to its subplot
for i in range(10):
    coords = all_coords[i]
    row = (i // 5) + 1
    col = (i % 5) + 1
    
    # Create color values as token position
    color_values = list(range(len(coords)))
    
    # Add trajectory line and markers (2D: only PC1 and PC2)
    fig_2d.add_trace(
        go.Scatter(
            x=coords[:, 0],
            y=coords[:, 1],
            mode='markers+lines',
            marker=dict(
                size=5,
                color=color_values,
                colorscale='viridis',
                showscale=True if i == 0 else False,  # Show colorbar only for first subplot
                colorbar=dict(
                    title="Token Position",
                    x=1.02,
                    len=0.4
                ) if i == 0 else None
            ),
            line=dict(
                color='rgba(0,0,0,0.3)',
                width=2
            ),
            name='Trajectory',
            showlegend=False
        ),
        row=row, col=col
    )
    
    # Add weight PCA points as reference (2D: only PC1 and PC2)
    fig_2d.add_trace(
        go.Scatter(
            x=weight_pca_coords[:, 0],
            y=weight_pca_coords[:, 1],
            mode='markers+text',
            marker=dict(
                size=8,
                color='red',
                symbol='diamond',
                opacity=0.7,
                line=dict(color='darkred', width=1)
            ),
            text=[str(j) for j in range(len(weight_pca_coords))],
            textposition='top center',
            textfont=dict(size=9, color='darkred'),
            name='Weight Points',
            showlegend=True if i == 0 else False
        ),
        row=row, col=col
    )

fig_2d.update_layout(
    title='2D Trajectories for All Sequences (PC1 vs PC2)',
    width=1600,
    height=800
)

# Update axes for all subplots
for i in range(1, 11):
    row = ((i-1) // 5) + 1
    col = ((i-1) % 5) + 1
    fig_2d.update_xaxes(title_text='PC1', row=row, col=col)
    fig_2d.update_yaxes(title_text='PC2', row=row, col=col)

fig_2d.show()

In [84]:
# Plot how each PCA component varies with token sequence position, averaged across all sequences
import numpy as np

# Find the maximum sequence length to determine how many positions to plot
max_seq_len = max(len(coords) for coords in all_coords)

# Initialize arrays to store sums and counts for averaging
component_sums = np.zeros((max_seq_len, 9))  # 9 PCA components
position_counts = np.zeros(max_seq_len)

# Accumulate values for each position across all sequences
for coords in all_coords:
    seq_len = len(coords)
    for pos in range(seq_len):
        component_sums[pos] += coords[pos]
        position_counts[pos] += 1

# Calculate averages (only for positions that have data)
component_averages = np.zeros((max_seq_len, 9))
valid_positions = position_counts > 0
component_averages[valid_positions] = component_sums[valid_positions] / position_counts[valid_positions, np.newaxis]

fig = go.Figure()

# Plot each PCA component
for i in range(8):
    # Only plot positions that have data
    valid_pos_indices = np.where(valid_positions)[0]
    fig.add_trace(
        go.Scatter(
            x=valid_pos_indices,
            y=component_averages[valid_positions, i],
            mode='lines+markers',
            name=f'PC{i+1}',
            line=dict(width=2),
            marker=dict(size=4)
        )
    )

fig.update_layout(
    title='All PCA Components vs Token Position (Averaged Across All Sequences)',
    xaxis_title='Token Position',
    yaxis_title='Average PCA Component Value',
    width=1000,
    height=600,
    showlegend=True
)

fig.show()


In [10]:
# Align trajectories from the end and compute average
import numpy as np
import plotly.graph_objects as go

# Find the maximum sequence length
max_seq_len = max(len(coords) for coords in all_coords)

# Align all trajectories from the end (right-align)
# We'll pad the beginning with NaN for sequences that are shorter
aligned_trajectories = []
for coords in all_coords:
    seq_len = len(coords)
    if seq_len < max_seq_len:
        # Pad at the beginning with NaN
        padding = np.full((max_seq_len - seq_len, coords.shape[1]), np.nan)
        aligned = np.vstack([padding, coords])
    else:
        aligned = coords
    aligned_trajectories.append(aligned)

# Stack into array: [n_sequences, max_seq_len, n_components]
aligned_array = np.stack(aligned_trajectories)

# Compute mean and std, ignoring NaN values
mean_trajectory = np.nanmean(aligned_array, axis=0)
std_trajectory = np.nanstd(aligned_array, axis=0)

# Create position indices relative to end (negative = steps before end, 0 = end)
positions_from_end = np.arange(-max_seq_len + 1, 1)

print(f"Aligned trajectories shape: {aligned_array.shape}")
print(f"Mean trajectory shape: {mean_trajectory.shape}")
print(f"Number of valid sequences at each position:")
valid_counts = np.sum(~np.isnan(aligned_array[:, :, 0]), axis=0)
for i, (pos, count) in enumerate(zip(positions_from_end, valid_counts)):
    if i % 50 == 0 or i == len(positions_from_end) - 1:
        print(f"  Position {pos:4d} from end: {count:2.0f} sequences")

Aligned trajectories shape: (30, 4341, 9)
Mean trajectory shape: (4341, 9)
Number of valid sequences at each position:
  Position -4340 from end:  1 sequences
  Position -4290 from end:  1 sequences
  Position -4240 from end:  1 sequences
  Position -4190 from end:  1 sequences
  Position -4140 from end:  1 sequences
  Position -4090 from end:  1 sequences
  Position -4040 from end:  1 sequences
  Position -3990 from end:  1 sequences
  Position -3940 from end:  1 sequences
  Position -3890 from end:  1 sequences
  Position -3840 from end:  1 sequences
  Position -3790 from end:  1 sequences
  Position -3740 from end:  1 sequences
  Position -3690 from end:  1 sequences
  Position -3640 from end:  1 sequences
  Position -3590 from end:  1 sequences
  Position -3540 from end:  1 sequences
  Position -3490 from end:  1 sequences
  Position -3440 from end:  1 sequences
  Position -3390 from end:  1 sequences
  Position -3340 from end:  1 sequences
  Position -3290 from end:  1 sequences
 

In [11]:
# Visualize the average trajectory in 3D with confidence bands
fig = go.Figure()

# Add individual trajectories (semi-transparent)
for i, coords in enumerate(all_coords):
    seq_len = len(coords)
    pos_from_end = np.arange(-seq_len + 1, 1)
    
    fig.add_trace(
        go.Scatter3d(
            x=coords[:, 0],
            y=coords[:, 1],
            z=coords[:, 2],
            mode='lines',
            line=dict(color='rgba(150,150,150,0.2)', width=1),
            name=f'Seq {i}' if i < 3 else None,
            showlegend=i < 3,
            hovertemplate=f'Sequence {i}<br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<br>PC3: %{{z:.2f}}<extra></extra>'
        )
    )

# Add the mean trajectory (bold)
fig.add_trace(
    go.Scatter3d(
        x=mean_trajectory[:, 0],
        y=mean_trajectory[:, 1],
        z=mean_trajectory[:, 2],
        mode='lines+markers',
        line=dict(color='blue', width=6),
        marker=dict(
            size=4,
            color=positions_from_end,
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title="Steps from<br>End", x=1.05)
        ),
        name='Mean Trajectory',
        showlegend=True,
        hovertemplate='Mean<br>PC1: %{x:.2f}<br>PC2: %{y:.2f}<br>PC3: %{z:.2f}<br>Position: %{marker.color}<extra></extra>'
    )
)

# Add weight PCA points as reference
fig.add_trace(
    go.Scatter3d(
        x=weight_pca_coords[:, 0],
        y=weight_pca_coords[:, 1],
        z=weight_pca_coords[:, 2],
        mode='markers+text',
        marker=dict(
            size=6,
            color='red',
            symbol='diamond',
            opacity=0.8,
            line=dict(color='darkred', width=2)
        ),
        text=[str(j) for j in range(len(weight_pca_coords))],
        textposition='top center',
        textfont=dict(size=10, color='darkred'),
        name='Weight Points',
        showlegend=True
    )
)

fig.update_layout(
    title='Average Trajectory Aligned from End (3D)<br><sub>Individual trajectories in gray, mean in blue</sub>',
    scene=dict(
        xaxis_title='PC1',
        yaxis_title='PC2',
        zaxis_title='PC3',
        camera=dict(
            eye=dict(x=1.5, y=1.5, z=1.5)
        )
    ),
    width=1200,
    height=900,
    showlegend=True
)

fig.show()

In [13]:
# 2D version: Average trajectory aligned from end
fig_2d = go.Figure()

# Add individual trajectories (semi-transparent)
for i, coords in enumerate(all_coords):
    fig_2d.add_trace(
        go.Scatter(
            x=coords[:, 0],
            y=coords[:, 1],
            mode='lines',
            line=dict(color='rgba(150,150,150,0.3)', width=1),
            name=f'Seq {i}' if i < 3 else None,
            showlegend=i < 3,
            hovertemplate=f'Sequence {i}<br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<extra></extra>'
        )
    )

# Add the mean trajectory with color gradient showing position from end
fig_2d.add_trace(
    go.Scatter(
        x=mean_trajectory[:, 0],
        y=mean_trajectory[:, 1],
        mode='lines+markers',
        line=dict(color='blue', width=4),
        marker=dict(
            size=6,
            color=positions_from_end,
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(title="Steps from End", x=1.05),
            line=dict(color='darkblue', width=1)
        ),
        name='Mean Trajectory',
        showlegend=True,
        hovertemplate='Mean<br>PC1: %{x:.2f}<br>PC2: %{y:.2f}<br>Steps from end: %{marker.color}<extra></extra>'
    )
)

# Add weight PCA points as reference
fig_2d.add_trace(
    go.Scatter(
        x=weight_pca_coords[:, 0],
        y=weight_pca_coords[:, 1],
        mode='markers+text',
        marker=dict(
            size=10,
            color='red',
            symbol='diamond',
            opacity=0.8,
            line=dict(color='darkred', width=2)
        ),
        text=[str(j) for j in range(len(weight_pca_coords))],
        textposition='top center',
        textfont=dict(size=11, color='darkred', family='Arial Black'),
        name='Weight Points',
        showlegend=True
    )
)

fig_2d.update_layout(
    title='Average Trajectory Aligned from End (2D: PC1 vs PC2)<br><sub>Individual trajectories in gray, mean in blue, color shows steps from end</sub>',
    xaxis_title='PC1',
    yaxis_title='PC2',
    width=1000,
    height=800,
    showlegend=True,
    hovermode='closest'
)

fig_2d.show()

In [14]:
# Analyze how each PCA component evolves relative to the end
# Plot each component separately to see the convergence pattern

fig_components = make_subplots(
    rows=3, cols=3,
    subplot_titles=[f'PC{i+1} vs Steps from End' for i in range(9)]
)

for comp_idx in range(9):
    row = (comp_idx // 3) + 1
    col = (comp_idx % 3) + 1
    
    # Add individual trajectories
    for i, coords in enumerate(all_coords):
        seq_len = len(coords)
        pos_from_end = np.arange(-seq_len + 1, 1)
        
        fig_components.add_trace(
            go.Scatter(
                x=pos_from_end,
                y=coords[:, comp_idx],
                mode='lines',
                line=dict(color='rgba(150,150,150,0.3)', width=1),
                showlegend=False,
                hovertemplate=f'Seq {i}<br>Steps: %{{x}}<br>PC{comp_idx+1}: %{{y:.2f}}<extra></extra>'
            ),
            row=row, col=col
        )
    
    # Add mean trajectory
    fig_components.add_trace(
        go.Scatter(
            x=positions_from_end,
            y=mean_trajectory[:, comp_idx],
            mode='lines+markers',
            line=dict(color='blue', width=3),
            marker=dict(size=3, color='blue'),
            name=f'Mean PC{comp_idx+1}' if comp_idx == 0 else None,
            showlegend=comp_idx == 0,
            hovertemplate=f'Mean<br>Steps: %{{x}}<br>PC{comp_idx+1}: %{{y:.2f}}<extra></extra>'
        ),
        row=row, col=col
    )
    
    # Add horizontal lines at weight point values for reference
    for weight_idx in [0, 5, 10, 15, 19]:  # Show a few key weight points
        fig_components.add_hline(
            y=weight_pca_coords[weight_idx, comp_idx],
            line_dash="dash",
            line_color="red",
            opacity=0.3,
            annotation_text=f"W{weight_idx}" if comp_idx == 0 else "",
            annotation_position="right",
            row=row, col=col
        )

fig_components.update_xaxes(title_text="Steps from End")
fig_components.update_yaxes(title_text="Component Value")

fig_components.update_layout(
    title='PCA Components vs Steps from End<br><sub>Each component shown separately, aligned from end</sub>',
    width=1400,
    height=1000,
    showlegend=True
)

fig_components.show()

In [None]:
# Load the Qwen3-4B.json file to get the actual text/tokens for each sequence
import json
from nnsight import LanguageModel

# Load the data
with open('/workspace/llm-progress-monitor/rollouts/Qwen3-4B.json', 'r') as f:
    data = json.load(f)

# Load model to tokenize
model_name = 'Qwen/Qwen3-4B'
device = 'cuda'
model = LanguageModel(model_name, device_map=device, dtype=torch.bfloat16)

# Find </think> token positions for the sequences we analyzed
think_end_positions = []

for i in range(30):
    text = data[i]['response']
    
    # Tokenize the full text
    tokens = model.tokenizer.encode(text, return_tensors='pt').squeeze().tolist()
    
    # Find </think> in the text
    think_end_idx = text.find('</think>')
    
    if think_end_idx != -1:
        # Tokenize up to </think> to find the token position
        text_up_to_think = text[:think_end_idx + len('</think>')]
        tokens_up_to_think = model.tokenizer.encode(text_up_to_think, return_tensors='pt').squeeze().tolist()
        
        # The position is the length of tokens up to (and including) </think>
        think_position = len(tokens_up_to_think) - 1  # -1 because we want 0-indexed
        
        think_end_positions.append({
            'seq_idx': i,
            'think_position': think_position,
            'total_tokens': len(tokens),
            'char_position': think_end_idx,
            'has_think': True
        })
    else:
        think_end_positions.append({
            'seq_idx': i,
            'think_position': None,
            'total_tokens': len(tokens),
            'char_position': None,
            'has_think': False
        })

# Display results
print("</think> Token Positions:")
print("="*70)
for info in think_end_positions:
    if info['has_think']:
        print(f"Seq {info['seq_idx']:2d}: </think> at token {info['think_position']:4d} / {info['total_tokens']:4d} ({info['think_position']/info['total_tokens']*100:.1f}% through)")
    else:
        print(f"Seq {info['seq_idx']:2d}: No </think> tag found (total tokens: {info['total_tokens']})")

print("\n" + "="*70)
print(f"Summary: {sum(1 for p in think_end_positions if p['has_think'])} sequences have </think> tag")
print(f"Average position: {np.mean([p['think_position'] for p in think_end_positions if p['has_think']]):.1f} tokens")
print(f"Average percentage through: {np.mean([p['think_position']/p['total_tokens']*100 for p in think_end_positions if p['has_think']]):.1f}%")

In [None]:
# Visualize trajectories with </think> markers
fig = go.Figure()

# Add individual trajectories
for i in range(min(10, len(all_coords))):
    coords = all_coords[i]
    seq_len = len(coords)
    
    fig.add_trace(
        go.Scatter3d(
            x=coords[:, 0],
            y=coords[:, 1],
            z=coords[:, 2],
            mode='lines',
            line=dict(color='rgba(150,150,150,0.3)', width=2),
            name=f'Seq {i}',
            showlegend=i < 3,
            hovertemplate=f'Sequence {i}<br>Token: %{{text}}<br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<br>PC3: %{{z:.2f}}<extra></extra>',
            text=[f"{j}" for j in range(len(coords))]
        )
    )
    
    # Add marker for </think> position if it exists
    if i < len(think_end_positions) and think_end_positions[i]['has_think']:
        think_pos = think_end_positions[i]['think_position']
        if think_pos < len(coords):
            fig.add_trace(
                go.Scatter3d(
                    x=[coords[think_pos, 0]],
                    y=[coords[think_pos, 1]],
                    z=[coords[think_pos, 2]],
                    mode='markers',
                    marker=dict(
                        size=10,
                        color='orange',
                        symbol='x',
                        line=dict(color='darkorange', width=2)
                    ),
                    name=f'</think> Seq {i}' if i < 3 else None,
                    showlegend=i < 3,
                    hovertemplate=f'Sequence {i}<br></think> at token {think_pos}<br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<br>PC3: %{{z:.2f}}<extra></extra>'
                )
            )

# Add mean trajectory
fig.add_trace(
    go.Scatter3d(
        x=mean_trajectory[:, 0],
        y=mean_trajectory[:, 1],
        z=mean_trajectory[:, 2],
        mode='lines',
        line=dict(color='blue', width=4),
        name='Mean Trajectory',
        showlegend=True
    )
)

# Add weight PCA points
fig.add_trace(
    go.Scatter3d(
        x=weight_pca_coords[:, 0],
        y=weight_pca_coords[:, 1],
        z=weight_pca_coords[:, 2],
        mode='markers+text',
        marker=dict(
            size=5,
            color='red',
            symbol='diamond',
            opacity=0.7
        ),
        text=[str(j) for j in range(len(weight_pca_coords))],
        textposition='top center',
        textfont=dict(size=8, color='darkred'),
        name='Weight Points',
        showlegend=True
    )
)

fig.update_layout(
    title='3D Trajectories with </think> Markers (Orange X)<br><sub>Shows where the thinking phase ends in each sequence</sub>',
    scene=dict(
        xaxis_title='PC1',
        yaxis_title='PC2',
        zaxis_title='PC3'
    ),
    width=1200,
    height=900,
    showlegend=True
)

fig.show()

In [None]:
# 2D version with </think> markers
fig_2d = go.Figure()

# Add individual trajectories
for i in range(min(10, len(all_coords))):
    coords = all_coords[i]
    
    fig_2d.add_trace(
        go.Scatter(
            x=coords[:, 0],
            y=coords[:, 1],
            mode='lines',
            line=dict(color='rgba(150,150,150,0.4)', width=2),
            name=f'Seq {i}',
            showlegend=i < 3,
            hovertemplate=f'Sequence {i}<br>Token: %{{text}}<br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<extra></extra>',
            text=[f"{j}" for j in range(len(coords))]
        )
    )
    
    # Add marker for </think> position
    if i < len(think_end_positions) and think_end_positions[i]['has_think']:
        think_pos = think_end_positions[i]['think_position']
        if think_pos < len(coords):
            fig_2d.add_trace(
                go.Scatter(
                    x=[coords[think_pos, 0]],
                    y=[coords[think_pos, 1]],
                    mode='markers',
                    marker=dict(
                        size=15,
                        color='orange',
                        symbol='x',
                        line=dict(color='darkorange', width=3)
                    ),
                    name=f'</think> Seq {i}' if i < 3 else None,
                    showlegend=i < 3,
                    hovertemplate=f'Sequence {i}<br></think> at token {think_pos}<br>({think_pos/think_end_positions[i]["total_tokens"]*100:.1f}% through)<br>PC1: %{{x:.2f}}<br>PC2: %{{y:.2f}}<extra></extra>'
                )
            )

# Add mean trajectory
fig_2d.add_trace(
    go.Scatter(
        x=mean_trajectory[:, 0],
        y=mean_trajectory[:, 1],
        mode='lines+markers',
        line=dict(color='blue', width=4),
        marker=dict(size=4, color='blue'),
        name='Mean Trajectory',
        showlegend=True
    )
)

# Add weight PCA points
fig_2d.add_trace(
    go.Scatter(
        x=weight_pca_coords[:, 0],
        y=weight_pca_coords[:, 1],
        mode='markers+text',
        marker=dict(
            size=10,
            color='red',
            symbol='diamond',
            opacity=0.8,
            line=dict(color='darkred', width=2)
        ),
        text=[str(j) for j in range(len(weight_pca_coords))],
        textposition='top center',
        textfont=dict(size=11, color='darkred', family='Arial Black'),
        name='Weight Points',
        showlegend=True
    )
)

fig_2d.update_layout(
    title='2D Trajectories with </think> Markers (Orange X)<br><sub>Shows where the thinking phase ends for each sequence</sub>',
    xaxis_title='PC1',
    yaxis_title='PC2',
    width=1200,
    height=900,
    showlegend=True,
    hovermode='closest'
)

fig_2d.show()

In [None]:
# Analyze </think> positions relative to trajectory features
# Calculate average position of </think> in PCA space

think_coords_list = []
think_positions_from_start = []
think_positions_from_end = []

for i, info in enumerate(think_end_positions[:len(all_coords)]):
    if info['has_think']:
        think_pos = info['think_position']
        if think_pos < len(all_coords[i]):
            # Store coordinates at </think>
            think_coords_list.append(all_coords[i][think_pos])
            
            # Store relative positions
            think_positions_from_start.append(think_pos)
            think_positions_from_end.append(info['total_tokens'] - think_pos)

if think_coords_list:
    think_coords_array = np.array(think_coords_list)
    mean_think_coord = np.mean(think_coords_array, axis=0)
    
    print("</think> Position Analysis:")
    print("="*70)
    print(f"Number of sequences with </think>: {len(think_coords_list)}")
    print(f"\nAverage </think> position:")
    print(f"  From start: {np.mean(think_positions_from_start):.1f} tokens (±{np.std(think_positions_from_start):.1f})")
    print(f"  From end: {np.mean(think_positions_from_end):.1f} tokens (±{np.std(think_positions_from_end):.1f})")
    print(f"\nAverage PCA coordinates at </think>:")
    for i in range(min(3, len(mean_think_coord))):
        print(f"  PC{i+1}: {mean_think_coord[i]:.3f} (±{np.std(think_coords_array[:, i]):.3f})")
    
    # Compare with weight points - find nearest weight to mean </think> position
    distances_to_weights = np.linalg.norm(weight_pca_coords[:, :3] - mean_think_coord[:3], axis=1)
    nearest_weight_idx = np.argmin(distances_to_weights)
    
    print(f"\nNearest weight point to mean </think> position: Weight {nearest_weight_idx}")
    print(f"  Distance: {distances_to_weights[nearest_weight_idx]:.3f}")
    print(f"  This corresponds to ~{nearest_weight_idx * 5}% through the response")
else:
    print("No </think> tags found in sequences")