In [1]:
import numpy as np
import pandas as pd

def pca_music_features(data, k=2):
    """
    Custom implementation of PCA for reducing dimensions of music feature data.

    Parameters:
    - data: pd.DataFrame or np.ndarray → each row is a song, columns are features (e.g., tempo, energy)
    - k: int → number of principal components to keep

    Returns:
    - reduced_data: Data with reduced dimensions
    - principal_axes: Eigenvectors used for projection
    - variance_explained: Eigenvalues of the top components
    """
    # Convert to NumPy array if input is a DataFrame
    if isinstance(data, pd.DataFrame):
        data = data.values

    # Step 1: Centering the data (subtract mean)
    mean_vector = np.mean(data, axis=0)
    centered_data = data - mean_vector

    # Step 2: Calculate covariance matrix
    covariance_matrix = np.cov(centered_data, rowvar=False)

    # Step 3: Find eigenvalues and eigenvectors
    eigen_vals, eigen_vecs = np.linalg.eigh(covariance_matrix)

    # Step 4: Sort eigenvectors by decreasing eigenvalues
    sorted_indices = np.argsort(eigen_vals)[::-1]
    top_eigen_vecs = eigen_vecs[:, sorted_indices[:k]]
    top_eigen_vals = eigen_vals[sorted_indices[:k]]

    # Step 5: Project the data onto the new space
    reduced_data = np.dot(centered_data, top_eigen_vecs)

    return reduced_data, top_eigen_vecs.T, top_eigen_vals

# Example to test PCA on music data
if __name__ == "__main__":
    # Let's say we have some features extracted from songs
    music_features = pd.DataFrame({
        'tempo': [110, 125, 130, 115],
        'energy': [0.6, 0.8, 0.75, 0.65],
        'danceability': [0.7, 0.9, 0.85, 0.6],
        'loudness': [-6.0, -4.5, -5.0, -6.5]
    })

    reduced, axes, var = pca_music_features(music_features, k=2)

    print("Reduced Data (2D):\n", reduced)
    print("Principal Axes:\n", axes)
    print("Explained Variance:\n", var)


Reduced Data (2D):
 [[ 10.00846089   0.30632883]
 [ -5.06597661   0.60332725]
 [-10.00831175  -0.30507363]
 [  5.06582748  -0.60458245]]
Principal Axes:
 [[-0.99667978 -0.00898486 -0.01200428 -0.08002863]
 [-0.08126757  0.04874921  0.14770632  0.98448054]]
Explained Variance:
 [83.8874402   0.30547699]
