# DAC Continuous Codebook Embeddings Visualization

This notebook extracts and visualizes **continuous codebook embeddings** from DAC.

## Approach:
- Extract discrete codes (indices) from DAC
- Look up continuous embeddings from codebooks
- Pool embeddings across time and codebooks
- Apply PCA/t-SNE like the main dashboard
- Compare clustering quality with Wav2Vec2/Whisper

In [None]:
import sys
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Import our utilities
from dac_utils import DACProcessor, SpeechCommandsLoader, extract_dac_embeddings_batch

print("Imports successful!")

## Step 1: Initialize DAC Model

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

dac_processor = DACProcessor(model_type="16khz", device=device)

## Step 2: Test Embedding Extraction on Single Sample

In [None]:
# Load a test sample
loader = SpeechCommandsLoader()
file_paths, labels = loader.load_word_samples(['zero'], samples_per_word=1)

if len(file_paths) > 0:
    test_file = file_paths[0]
    print(f"Testing with: {test_file}\n")
    
    # Encode to get codes
    encoded = dac_processor.encode_audio(test_file)
    codes = encoded['codes']  # [1, n_codebooks, time]
    
    print(f"Codes shape: {codes.shape}")
    
    # Get codebook embeddings
    embeddings = dac_processor.get_codebook_embeddings(codes)
    print(f"Codebook embeddings shape: {embeddings.shape}  # [batch, n_codebooks, time, codebook_dim]")
    
    # Show embedding statistics
    print(f"\nEmbedding Statistics:")
    print(f"  Min: {embeddings.min().item():.4f}")
    print(f"  Max: {embeddings.max().item():.4f}")
    print(f"  Mean: {embeddings.mean().item():.4f}")
    print(f"  Std: {embeddings.std().item():.4f}")
    
    # Test pooling
    print(f"\nPooling Methods:")
    
    # Mean pooling across time and codebooks
    mean_pooled = embeddings.mean(dim=[1, 2])  # [batch, codebook_dim]
    print(f"  Mean pooled shape: {mean_pooled.shape}")
    # print(f"  Mean pooled vector: {mean_pooled.squeeze().cpu().numpy()}")
else:
    print("No audio files found!")

## Step 3: Load Dataset - 5 Words, 10 Samples Each

In [None]:
# Select 5 words for visualization
words = ['zero', 'one', 'two', 'yes', 'no']
samples_per_word = 10

# Load audio paths
file_paths, file_labels = loader.load_word_samples(words, samples_per_word=samples_per_word)

print(f"Total samples: {len(file_paths)}")
print(f"\nLabel distribution:")
for word in words:
    count = file_labels.count(word)
    print(f"  {word}: {count} samples")

## Step 4: Extract Embeddings for All Samples

In [None]:
# Extract embeddings using mean pooling
embeddings, valid_labels = extract_dac_embeddings_batch(
    file_paths,
    file_labels,
    dac_processor,
    pooling_method='mean',
    use_codebook_embeddings=True
)

print(f"\nFinal embeddings shape: {embeddings.shape}")
print(f"Embedding dimension: {embeddings.shape[1]}")

## Step 5: PCA Visualization (2D and 3D)

In [None]:
# PCA - 2D
pca_2d = PCA(n_components=2, random_state=42)
pca_2d_result = pca_2d.fit_transform(embeddings)
variance_2d = pca_2d.explained_variance_ratio_.sum()

print(f"PCA 2D variance explained: {variance_2d:.2%}")

# Create color map
color_map = {word: px.colors.qualitative.Plotly[i] for i, word in enumerate(words)}

# 2D Plot
fig_2d = go.Figure()

for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig_2d.add_trace(go.Scatter(
        x=pca_2d_result[mask, 0],
        y=pca_2d_result[mask, 1],
        mode='markers',
        name=word,
        marker=dict(size=12, color=color_map[word], opacity=0.7, line=dict(width=0.5, color='white'))
    ))

fig_2d.update_layout(
    title=f'PCA 2D: DAC Codebook Embeddings (Variance: {variance_2d:.1%})',
    xaxis_title='PC 1',
    yaxis_title='PC 2',
    width=900,
    height=700,
    xaxis=dict(scaleanchor='y', scaleratio=1),  # Equal aspect ratio
    template='plotly_white'
)

fig_2d.write_html('dac_embeddings_pca_2d.html')
fig_2d.show()

print("Saved: dac_embeddings_pca_2d.html")

In [None]:
# PCA - 3D
pca_3d = PCA(n_components=3, random_state=42)
pca_3d_result = pca_3d.fit_transform(embeddings)
variance_3d = pca_3d.explained_variance_ratio_.sum()

print(f"PCA 3D variance explained: {variance_3d:.2%}")

# 3D Plot
fig_3d = go.Figure()

for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig_3d.add_trace(go.Scatter3d(
        x=pca_3d_result[mask, 0],
        y=pca_3d_result[mask, 1],
        z=pca_3d_result[mask, 2],
        mode='markers',
        name=word,
        marker=dict(size=6, color=color_map[word], opacity=0.7, line=dict(width=0))
    ))

fig_3d.update_layout(
    title=f'PCA 3D: DAC Codebook Embeddings (Variance: {variance_3d:.1%})',
    scene=dict(
        xaxis_title='PC 1',
        yaxis_title='PC 2',
        zaxis_title='PC 3',
        aspectmode='cube',  # Equal aspect ratio
        camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
    ),
    width=900,
    height=700,
    template='plotly_white'
)

fig_3d.write_html('dac_embeddings_pca_3d.html')
fig_3d.show()

print("Saved: dac_embeddings_pca_3d.html")

## Step 6: t-SNE Visualization (2D and 3D)

In [None]:
# t-SNE - 2D
perplexity = min(30, len(embeddings) - 1)
tsne_2d = TSNE(n_components=2, random_state=42, metric='cosine', perplexity=perplexity)
tsne_2d_result = tsne_2d.fit_transform(embeddings)

print(f"t-SNE 2D completed with perplexity={perplexity}")

# 2D Plot
fig_tsne_2d = go.Figure()

for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig_tsne_2d.add_trace(go.Scatter(
        x=tsne_2d_result[mask, 0],
        y=tsne_2d_result[mask, 1],
        mode='markers',
        name=word,
        marker=dict(size=12, color=color_map[word], opacity=0.7, line=dict(width=0.5, color='white'))
    ))

fig_tsne_2d.update_layout(
    title=f't-SNE 2D: DAC Codebook Embeddings (Perplexity={perplexity})',
    xaxis_title='t-SNE 1',
    yaxis_title='t-SNE 2',
    width=900,
    height=700,
    xaxis=dict(scaleanchor='y', scaleratio=1),  # Equal aspect ratio (critical for t-SNE)
    template='plotly_white'
)

fig_tsne_2d.write_html('dac_embeddings_tsne_2d.html')
fig_tsne_2d.show()

print("Saved: dac_embeddings_tsne_2d.html")

In [None]:
# t-SNE - 3D
tsne_3d = TSNE(n_components=3, random_state=42, metric='cosine', perplexity=perplexity)
tsne_3d_result = tsne_3d.fit_transform(embeddings)

print(f"t-SNE 3D completed with perplexity={perplexity}")

# 3D Plot
fig_tsne_3d = go.Figure()

for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig_tsne_3d.add_trace(go.Scatter3d(
        x=tsne_3d_result[mask, 0],
        y=tsne_3d_result[mask, 1],
        z=tsne_3d_result[mask, 2],
        mode='markers',
        name=word,
        marker=dict(size=6, color=color_map[word], opacity=0.7, line=dict(width=0))
    ))

fig_tsne_3d.update_layout(
    title=f't-SNE 3D: DAC Codebook Embeddings (Perplexity={perplexity})',
    scene=dict(
        xaxis_title='t-SNE 1',
        yaxis_title='t-SNE 2',
        zaxis_title='t-SNE 3',
        aspectmode='cube',  # Equal aspect ratio
        camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
    ),
    width=900,
    height=700,
    template='plotly_white'
)

fig_tsne_3d.write_html('dac_embeddings_tsne_3d.html')
fig_tsne_3d.show()

print("Saved: dac_embeddings_tsne_3d.html")

## Step 7: Combined Dashboard-Style Visualization

In [None]:
# Create a 2x2 grid like the main dashboard
from plotly.subplots import make_subplots

fig = make_subplots(
    rows=2, cols=2,
    specs=[
        [{'type': 'scatter'}, {'type': 'scatter3d'}],
        [{'type': 'scatter'}, {'type': 'scatter3d'}]
    ],
    subplot_titles=(
        f'PCA 2D (Var: {variance_2d:.1%})',
        f'PCA 3D (Var: {variance_3d:.1%})',
        f't-SNE 2D (Perp: {perplexity})',
        f't-SNE 3D (Perp: {perplexity})'
    ),
    vertical_spacing=0.12,
    horizontal_spacing=0.1
)

# PCA 2D (row 1, col 1)
for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig.add_trace(
        go.Scatter(
            x=pca_2d_result[mask, 0],
            y=pca_2d_result[mask, 1],
            mode='markers',
            name=word,
            marker=dict(size=8, color=color_map[word], opacity=0.7),
            showlegend=True,
            legendgroup=word
        ),
        row=1, col=1
    )

# PCA 3D (row 1, col 2)
for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig.add_trace(
        go.Scatter3d(
            x=pca_3d_result[mask, 0],
            y=pca_3d_result[mask, 1],
            z=pca_3d_result[mask, 2],
            mode='markers',
            name=word,
            marker=dict(size=5, color=color_map[word], opacity=0.7),
            showlegend=False,
            legendgroup=word
        ),
        row=1, col=2
    )

# t-SNE 2D (row 2, col 1)
for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig.add_trace(
        go.Scatter(
            x=tsne_2d_result[mask, 0],
            y=tsne_2d_result[mask, 1],
            mode='markers',
            name=word,
            marker=dict(size=8, color=color_map[word], opacity=0.7),
            showlegend=False,
            legendgroup=word
        ),
        row=2, col=1
    )

# t-SNE 3D (row 2, col 2)
for word in words:
    mask = np.array([label == word for label in valid_labels])
    fig.add_trace(
        go.Scatter3d(
            x=tsne_3d_result[mask, 0],
            y=tsne_3d_result[mask, 1],
            z=tsne_3d_result[mask, 2],
            mode='markers',
            name=word,
            marker=dict(size=5, color=color_map[word], opacity=0.7),
            showlegend=False,
            legendgroup=word
        ),
        row=2, col=2
    )

# Update layout
fig.update_layout(
    title_text='DAC Codebook Embeddings - Complete Visualization',
    title_x=0.5,
    title_font_size=20,
    width=1400,
    height=1000,
    showlegend=True,
    template='plotly_white'
)

# Update axes for equal aspect ratio
fig.update_xaxes(title_text='PC 1', row=1, col=1, scaleanchor='y', scaleratio=1)
fig.update_yaxes(title_text='PC 2', row=1, col=1)
fig.update_xaxes(title_text='t-SNE 1', row=2, col=1, scaleanchor='y3', scaleratio=1)
fig.update_yaxes(title_text='t-SNE 2', row=2, col=1)

# Update 3D scenes
fig.update_scenes(aspectmode='cube', row=1, col=2)
fig.update_scenes(aspectmode='cube', row=2, col=2)

fig.write_html('dac_embeddings_complete.html')
fig.show()

print("Saved: dac_embeddings_complete.html")

## Step 8: Analyze Clustering Quality

In [None]:
from sklearn.metrics import silhouette_score, davies_bouldin_score

# Convert labels to numeric
label_to_idx = {word: i for i, word in enumerate(words)}
numeric_labels = np.array([label_to_idx[label] for label in valid_labels])

print("=" * 60)
print("CLUSTERING QUALITY METRICS")
print("=" * 60)

# Original embeddings
if len(np.unique(numeric_labels)) > 1 and len(embeddings) > len(np.unique(numeric_labels)):
    sil_orig = silhouette_score(embeddings, numeric_labels)
    db_orig = davies_bouldin_score(embeddings, numeric_labels)
    print(f"\nOriginal Embeddings ({embeddings.shape[1]}D):")
    print(f"  Silhouette Score: {sil_orig:.4f}  (higher is better, range: -1 to 1)")
    print(f"  Davies-Bouldin Score: {db_orig:.4f}  (lower is better, >0)")

# PCA 2D
sil_pca2d = silhouette_score(pca_2d_result, numeric_labels)
db_pca2d = davies_bouldin_score(pca_2d_result, numeric_labels)
print(f"\nPCA 2D:")
print(f"  Silhouette Score: {sil_pca2d:.4f}")
print(f"  Davies-Bouldin Score: {db_pca2d:.4f}")

# PCA 3D
sil_pca3d = silhouette_score(pca_3d_result, numeric_labels)
db_pca3d = davies_bouldin_score(pca_3d_result, numeric_labels)
print(f"\nPCA 3D:")
print(f"  Silhouette Score: {sil_pca3d:.4f}")
print(f"  Davies-Bouldin Score: {db_pca3d:.4f}")

# t-SNE 2D
sil_tsne2d = silhouette_score(tsne_2d_result, numeric_labels)
db_tsne2d = davies_bouldin_score(tsne_2d_result, numeric_labels)
print(f"\nt-SNE 2D:")
print(f"  Silhouette Score: {sil_tsne2d:.4f}")
print(f"  Davies-Bouldin Score: {db_tsne2d:.4f}")

# t-SNE 3D
sil_tsne3d = silhouette_score(tsne_3d_result, numeric_labels)
db_tsne3d = davies_bouldin_score(tsne_3d_result, numeric_labels)
print(f"\nt-SNE 3D:")
print(f"  Silhouette Score: {sil_tsne3d:.4f}")
print(f"  Davies-Bouldin Score: {db_tsne3d:.4f}")

print("\n" + "=" * 60)
print("\nNote: Compare these metrics with Wav2Vec2/Whisper from the main dashboard!")
print("=" * 60)

## Summary

This notebook demonstrated DAC continuous codebook embeddings visualization:

1. ✅ Loaded DAC model and extracted discrete codes
2. ✅ Converted codes to continuous embeddings using codebook lookup
3. ✅ Pooled embeddings across time and codebooks (mean pooling)
4. ✅ Applied PCA and t-SNE in 2D and 3D
5. ✅ Created interactive Plotly visualizations matching the main dashboard style
6. ✅ Computed clustering quality metrics

### Key Findings:
- DAC embeddings are **8-dimensional** per codebook (9 codebooks total)
- Mean pooling across time and codebooks creates compact representations
- Clustering quality can be compared with Wav2Vec2/Whisper

### Next Steps:
- Integrate DAC into the main Flask dashboard
- Add DAC as a model option alongside Wav2Vec2 and Whisper
- Compare DAC clustering with other models on larger datasets