In [6]:
import torch
from torch.profiler import profile, record_function, ProfilerActivity

In [7]:
# Dummy data for PCA
data = torch.rand(1000, 500, device="cuda")  # Example: 1000 samples, 500 features

In [8]:
# Profiling PCA with eigenvalue decomposition
with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
    on_trace_ready=torch.profiler.tensorboard_trace_handler('./log'),  # Save trace
    record_shapes=True,
    with_stack=True
) as prof:
    with record_function("PCA using Eigenvalue Decomposition"):
        # Step 1: Mean subtraction
        mean = torch.mean(data, dim=0)
        centered_data = data - mean

        # Step 2: Compute the covariance matrix
        covariance_matrix = torch.mm(centered_data.T, centered_data) / (data.size(0) - 1)

        # Step 3: Eigen decomposition of the covariance matrix
        eigenvalues, eigenvectors = torch.linalg.eigh(covariance_matrix)

        # Step 4: Sort eigenvalues and eigenvectors in descending order
        sorted_indices = torch.argsort(eigenvalues, descending=True)
        eigenvalues = eigenvalues[sorted_indices]
        eigenvectors = eigenvectors[:, sorted_indices]

        # Step 5: Transform the data using top-k principal components (eigenvectors)
        k = 10  # Example: Reduce to 10 components
        principal_components = eigenvectors[:, :k]
        reduced_data = torch.mm(centered_data, principal_components)


In [9]:
# Print profiling results
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                     PCA using Eigenvalue Decomposition         0.00%       0.000us         0.00%       0.000us       0.000us     182.375ms       404.22%     182.375ms     182.375ms             1  
                     PCA using Eigenvalue Decomposition         0.73%       1.368ms        99.99%     186.140ms     186.140ms       0.000us         0.00%      45.118ms      45.118ms             1  
         

In [10]:
# Profiling PCA with SVD
with profile(
    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
    on_trace_ready=torch.profiler.tensorboard_trace_handler('./log'),  # Save trace
    record_shapes=True,
    with_stack=True
) as prof:
    with record_function("PCA"):
        # Step 1: Mean subtraction
        mean = torch.mean(data, dim=0)
        centered_data = data - mean

        # Step 2: Covariance matrix computation
        covariance_matrix = torch.mm(centered_data.T, centered_data) / (data.size(0) - 1)

        # Step 3: Eigen decomposition (or SVD for stability)
        U, S, V = torch.svd(covariance_matrix)

        # Step 4: Transform data (optional)
        reduced_data = torch.mm(centered_data, U[:, :10])  # Example: Reduce to 10 components

# Print profiling results
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    PCA         0.00%       0.000us         0.00%       0.000us       0.000us      69.980ms       105.70%      69.980ms      69.980ms             1  
                                                    PCA         3.43%       2.590ms        99.98%      75.590ms      75.590ms       0.000us         0.00%      66.205ms      66.205ms             1  
         