In [1]:
!pip install umap-learn
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, KernelPCA, IncrementalPCA, FactorAnalysis
from sklearn.manifold import LocallyLinearEmbedding, TSNE, Isomap, MDS
import umap
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

Collecting umap-learn
  Downloading umap_learn-0.5.7-py3-none-any.whl.metadata (21 kB)
Collecting pynndescent>=0.5 (from umap-learn)
  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)
Downloading umap_learn-0.5.7-py3-none-any.whl (88 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.8/88.8 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.13 umap-learn-0.5.7


In [2]:
def load_and_prepare_data():
    data = load_breast_cancer()
    X = data.data
    y = data.target
    feature_names = data.feature_names

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    print("Dataset shape:", X.shape)
    print("Features:", feature_names)
    return X_scaled, y, feature_names

X_scaled, y, features = load_and_prepare_data()

Dataset shape: (569, 30)
Features: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [3]:
def apply_reduction(X, method, **kwargs):
   print(f"Applying {method.__class__.__name__}...")
   X_reduced = method.fit_transform(X)
   return X_reduced

# Dictionary of all dimensionality reduction methods
reduction_methods = {
   'LLE': LocallyLinearEmbedding(n_components=2, random_state=42),
   't-SNE': TSNE(n_components=2, random_state=42),
   'ISOMAP': Isomap(n_components=2),
   'UMAP': umap.UMAP(random_state=42),
   'MDS': MDS(n_components=2, random_state=42),
   'PCA': PCA(n_components=2, random_state=42),
   'KernelPCA': KernelPCA(n_components=2, kernel='rbf', random_state=42),
   'IncrementalPCA': IncrementalPCA(n_components=2),
   'FactorAnalysis': FactorAnalysis(n_components=2, random_state=42)
}

In [9]:
def plot_reduction(X_reduced, y, title):
    fig = px.scatter(x=X_reduced[:, 0], y=X_reduced[:, 1],
                    color=y.astype(str),
                    title=title,
                    labels={'x': 'Component 1', 'y': 'Component 2'},
                    template='plotly_white')
    fig.show()

# Apply and visualize all methods
results = {}
for name, method in reduction_methods.items():
    results[name] = apply_reduction(X_scaled, method)
    plot_reduction(results[name], y, f"{name} - Breast Cancer Dataset")

Applying LocallyLinearEmbedding...


Applying TSNE...


Applying Isomap...


Applying UMAP...


Applying MDS...


Applying PCA...


Applying KernelPCA...


Applying IncrementalPCA...


Applying FactorAnalysis...


In [5]:
from tensorflow.keras import layers, Model
import tensorflow as tf

class SimpleAutoencoder(Model):
   def __init__(self, input_dim):
       super(SimpleAutoencoder, self).__init__()

       # Encoder
       self.encoder = tf.keras.Sequential([
           layers.Dense(16, activation='relu'),
           layers.Dense(2, name="latent_space")  # 2D latent space
       ])

       # Decoder
       self.decoder = tf.keras.Sequential([
           layers.Dense(16, activation='relu'),
           layers.Dense(input_dim)
       ])

   def call(self, x):
       encoded = self.encoder(x)
       decoded = self.decoder(encoded)
       return decoded

# Train autoencoder
def train_autoencoder(X):
   autoencoder = SimpleAutoencoder(X.shape[1])
   autoencoder.compile(optimizer='adam', loss='mse')

   history = autoencoder.fit(
       X, X,
       epochs=50,
       batch_size=32,
       validation_split=0.2,
       verbose=0
   )

   # Get latent space representation
   latent_space = autoencoder.encoder(X).numpy()
   return latent_space

# Add autoencoder results
results['Autoencoder'] = train_autoencoder(X_scaled)
plot_reduction(results['Autoencoder'], y, "Autoencoder - Breast Cancer Dataset")

In [6]:
from sklearn.metrics import silhouette_score
from time import time

def compare_methods(results, X, y):
    comparison = []

    for name, embedding in results.items():
        # Calculate metrics
        start_time = time()
        if name not in results:  # Recompute only if not already done
            embedding = reduction_methods[name].fit_transform(X)
        compute_time = time() - start_time

        silhouette = silhouette_score(embedding, y)

        comparison.append({
            'Method': name,
            'Computation Time': f"{compute_time:.2f}s",
            'Silhouette Score': f"{silhouette:.3f}"
        })

    # Create comparison DataFrame
    df_comparison = pd.DataFrame(comparison)
    return df_comparison

# Display comparison
comparison_df = compare_methods(results, X_scaled, y)
print("\nMethod Comparison:")
print(comparison_df.to_string(index=False))

# Visualize comparison
fig = px.bar(comparison_df,
             x='Method',
             y='Silhouette Score',
             title='Dimensionality Reduction Methods Comparison')
fig.show()


Method Comparison:
        Method Computation Time Silhouette Score
           LLE            0.00s            0.415
         t-SNE            0.00s            0.493
        ISOMAP            0.00s            0.491
          UMAP            0.00s            0.501
           MDS            0.00s            0.394
           PCA            0.00s            0.438
     KernelPCA            0.00s            0.431
IncrementalPCA            0.00s            0.438
FactorAnalysis            0.00s            0.354
   Autoencoder            0.00s            0.455
