In [5]:
import numpy as np
import pandas as pd

# PCA logic
def pca_music_features(data, k=2):
    if isinstance(data, pd.DataFrame):
        data = data.values

    mean_vector = np.mean(data, axis=0) #taking the mean
    centered_data = data - mean_vector #centering the data

    cov_matrix = np.cov(centered_data, rowvar=False) #computign the covariance matrix
    eigen_vals, eigen_vecs = np.linalg.eigh(cov_matrix) #and the eigen values and eigne vectors

    sorted_indices = np.argsort(eigen_vals)[::-1]
    top_eigen_vecs = eigen_vecs[:, sorted_indices[:k]] #taking the top k eigen vectors for projection

    reduced_data = np.dot(centered_data, top_eigen_vecs) #reducing the data using these top principal components
    return reduced_data

# logic for Cosine Similarity
def cosine_similarity(vec1, vec2):
    dot = np.dot(vec1, vec2) #takign dot product
    norm1 = np.linalg.norm(vec1) # and normalizing it
    norm2 = np.linalg.norm(vec2)
    return dot / (norm1 * norm2) if norm1 and norm2 else 0 #returning the cosine of the angle value

def get_similar_songs(song_index, feature_matrix, top_n=3):
    target = feature_matrix[song_index]
    similarities = []

    for idx, song_vec in enumerate(feature_matrix):
        if idx == song_index:
            continue
        sim = cosine_similarity(target, song_vec) #storing the similiarites by appending
        similarities.append((idx, sim))

    similarities.sort(key=lambda x: x[1], reverse=True) #and sorting them on the basic of most similarity to the least
    return similarities[:top_n]

# Sample Data & Testing
music_data = pd.DataFrame({
    'tempo': [120, 130, 125, 110, 135],
    'energy': [0.8, 0.9, 0.75, 0.6, 0.95],
    'danceability': [0.7, 0.85, 0.65, 0.6, 0.9],
    'loudness': [-5.0, -4.2, -6.0, -6.5, -3.8]
})

#applying PCA
reduced_data = pca_music_features(music_data, k=2)

# getting the similar songs to song at index 0
similar = get_similar_songs(song_index=0, feature_matrix=reduced_data, top_n=3)

print("Top similar songs to song 0:") #and prinitng them
for idx, score in similar:
    print(f"Song {idx} → Similarity: {score:.3f}")


Top similar songs to song 0:
Song 3 → Similarity: 0.992
Song 2 → Similarity: -0.757
Song 1 → Similarity: -0.985
