In [5]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

file_list = [
    "id_blf_correlation_mmsr.tsv",
    "id_blf_deltaspectral_mmsr.tsv",
    "id_blf_logfluc_mmsr.tsv",
    "id_blf_spectral_mmsr.tsv",
    "id_blf_spectralcontrast_mmsr.tsv",
    "id_blf_vardeltaspectral_mmsr.tsv",
    "id_incp_mmsr.tsv",
    "id_ivec256_mmsr.tsv",
    "id_ivec512_mmsr.tsv",
    "id_ivec1024_mmsr.tsv",
    "id_lyrics_tf-idf_mmsr.tsv",
    "id_lyrics_word2vec_mmsr.tsv",
    "id_lyrics_bert_mmsr.tsv",
    "id_mfcc_bow_mmsr.tsv",
    "id_mfcc_stats_mmsr.tsv",
    "id_musicnn_mmsr.tsv",
    "id_resnet_mmsr.tsv",
    "id_vgg19_mmsr.tsv",
]

def normalize_features(df):
    scaler = StandardScaler()
    return pd.DataFrame(scaler.fit_transform(df), index=df.index, columns=df.columns)

def load_and_merge_features(file_list, merge_on="id"):
    merged_df = pd.read_csv(file_list[0], sep="\t")
    for file in file_list[1:]:
        df = pd.read_csv(file, sep="\t")
        merged_df = pd.merge(merged_df, df, on=merge_on, how="inner")
    merged_df.set_index(merge_on, inplace=True)
    return normalize_features(merged_df)

def build_knn_recommendations(feature_matrix, top_k=10):
    knn_model = NearestNeighbors(n_neighbors=top_k + 1, metric="cosine")  
    knn_model.fit(feature_matrix)
    distances, indices = knn_model.kneighbors(feature_matrix)

    recommendation_matrix = np.zeros((feature_matrix.shape[0], feature_matrix.shape[0]))
    for idx, (neighbors, dist) in enumerate(zip(indices, distances)):
        for neighbor, d in zip(neighbors[1:], dist[1:]):  
            recommendation_matrix[idx, neighbor] = 1 - d  

    return recommendation_matrix

def save_recommendation_matrix(matrix, output_path="recs_knn1_10.csv"):
    np.savetxt(output_path, matrix, delimiter="\t")

merged_features = load_and_merge_features(file_list)
recommendation_matrix = build_knn_recommendations(merged_features.values, top_k=10)
save_recommendation_matrix(recommendation_matrix)