In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# Step 1: Create the t-SNE embedding visualization function
def create_large_text_tsne_plot(embedding, sample_indices=None, figsize=(12, 10)):
    """
    Create a t-SNE visualization with large fonts and point sizes.
    
    Args:
        embedding: The t-SNE embedding array with shape [n_samples, 2]
        sample_indices: Optional indices for samples (defaults to range(len(embedding)))
        figsize: Figure size tuple (width, height)
    """
    if sample_indices is None:
        sample_indices = np.arange(len(embedding))
    
    plt.figure(figsize=figsize)
    
    # Create scatter plot with larger points
    scatter = plt.scatter(embedding[:, 0], embedding[:, 1], 
                         c=sample_indices, cmap='viridis', 
                         s=200,  # Larger point size
                         alpha=0.9)
    
    # Add labels with larger font size
    for i, idx in enumerate(sample_indices):
        plt.annotate(str(idx), 
                    (embedding[i, 0], embedding[i, 1]),
                    fontsize=25,  # Much larger font
                    fontweight='bold',
                    ha='center',
                    va='center')
    
    # Larger colorbar and title
    cbar = plt.colorbar(scatter, label="Sample Index")
    cbar.ax.tick_params(labelsize=16)  # Larger tick labels
    cbar.set_label("Sample Index", size=18)  # Larger label
    
    # Larger axes labels and title
    plt.title("Feature Space Visualization (t-SNE)", fontsize=25)
    plt.xlabel("Dimension 1", fontsize=25)
    plt.ylabel("Dimension 2", fontsize=25)
    
    # Larger tick labels
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    
    plt.tight_layout()
    plt.show()
    
    return plt.gcf()  # Return the figure

# Step 2: Check if you have feature data to work with
if 'features' not in locals():
    print("ERROR: No 'features' variable found!")
    print("You need to extract features from your model first.")
    print("\nExpected format: features should be a 2D array with shape [n_samples, n_features]")
    print("Each row represents one sample, each column represents a feature dimension")
else:
    print(f"Found features with shape: {features.shape}")

# Step 3: Extract features if you don't have them yet
# (You'll need to replace this with your actual feature extraction code)
if 'features' not in locals():
    print("\n" + "="*50)
    print("EXAMPLE: How to extract features from your model")
    print("="*50)
    print("""
    # If you have a trained model and data:
    model.eval()
    features = []
    
    with torch.no_grad():
        for i, sample in enumerate(your_data):
            # Get features from model (before final prediction layer)
            feature_vector = model.get_features(sample)  # or similar method
            features.append(feature_vector.cpu().numpy())
    
    features = np.array(features)
    print(f"Extracted features shape: {features.shape}")
    """)
    
    # Create dummy data for demonstration
    print("\nCreating dummy feature data for demonstration...")
    np.random.seed(42)
    n_samples = 50
    n_features = 128
    features = np.random.randn(n_samples, n_features)
    print(f"Created dummy features with shape: {features.shape}")

# Step 4: Create the t-SNE embedding from high-dimensional features
if 'features' in locals():
    print("\n" + "="*50)
    print("STEP 4: CREATING T-SNE EMBEDDING")
    print("="*50)
    
    print(f"Input features shape: {features.shape}")
    print(f"Reducing from {features.shape[1]} dimensions to 2D for visualization...")
    
    # Create t-SNE embedding
    tsne = TSNE(n_components=2, random_state=42, perplexity=min(30, features.shape[0]-1))
    embedding = tsne.fit_transform(features)
    
    print(f"t-SNE embedding completed!")
    print(f"Embedding shape: {embedding.shape}")

# Step 5: Create the visualization
if 'embedding' in locals():
    print("\n" + "="*50) 
    print("STEP 5: CREATING VISUALIZATION")
    print("="*50)
    
    # Now you can use the visualization function
    fig = create_large_text_tsne_plot(embedding)
    print("t-SNE visualization created successfully!")
    
    # Optional: You can also create a version with specific sample indices
    if len(embedding) > 20:
        print("\nCreating visualization with first 20 samples...")
        selected_indices = list(range(20))
        selected_embedding = embedding[:20]
        fig2 = create_large_text_tsne_plot(selected_embedding, selected_indices)
        
else:
    print("Could not create embedding - check the steps above for errors")