In [154]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors


def select_cols(df: pd.DataFrame, cols_to_select: list):
    if not set(cols_to_select).issubset(df.columns):
        raise ValueError("Columns to select do not exist in the DataFrame.")
    return df[cols_to_select]


def ohe(df: pd.DataFrame, column: str) -> pd.DataFrame:
    return pd.get_dummies(df[column], prefix=column, dtype='int').reset_index(drop=True)


def create_feature_set(df, float_cols) -> pd.DataFrame:
    scaler = StandardScaler()

    # One-hot Encoding
    key_ohe = ohe(df, 'key')
    mode_ohe = ohe(df, 'mode')

    # Scale audio columns
    floats = df[float_cols].reset_index(drop=True)
    floats_scaled = pd.DataFrame(
        scaler.fit_transform(floats), columns=floats.columns)

    # Concatenate all features
    final = pd.concat([floats_scaled, key_ohe, mode_ohe], axis=1)

    # Add song id and popularity
    final.insert(0, "id", df['id'])
    final['popularity'] = df['popularity']

    return final


def process(df):
    df['mode'] = df['mode'].astype(int)
    df['key'] = df['key'].astype(int)
    floats = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
              'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']

    cols_to_select = ['id'] + floats + ['popularity']
    df = select_cols(df, cols_to_select)
    new_df = create_feature_set(df, floats)
    return new_df.sort_values(by='popularity', ascending=False).reset_index(drop=True)


class KNN():
    def __init__(self, df: pd.DataFrame) -> None:
        self.neigh = NearestNeighbors()
        self.df = df

    def recommend(self, playlist: pd.DataFrame):
        audio_feats = self.df.columns.difference(['id', 'popularity'])

        self.neigh.fit(self.df[audio_feats])

        n_neighbors = self.neigh.kneighbors(
            playlist[audio_feats], n_neighbors=10, return_distance=False)[0]
        return self.df.iloc[n_neighbors]['id'].tolist()


In [155]:
df = pd.read_csv("D:/Laboratory/Study/Monash/FIT3162/Resonance/data/Spotify Top Hits/cleaned_track_data.csv")
df = df[df['error'].isna()]
newdf = process(df)

playlist = pd.read_csv("D:/Laboratory/Study/Monash/FIT3162/Resonance/src/data/features.csv")
playlist = process(playlist)

In [157]:
knn = KNN(newdf)
recs = knn.recommend(playlist)

In [161]:
df[df['id'].isin(recs)]

Unnamed: 0,id,name,images,added_date,release_date,url,artist,popularity,danceability,energy,...,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature,error
40,46n2EGFnPC3tzWCN1Aqe26,This I Promise You,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-08-07T08:16:37Z,2000-03-21,https://open.spotify.com/track/46n2EGFnPC3tzWC...,6Ff53KvcvAj5U7Z1vojB5o,69,0.55,0.587,...,0.128,0.466,165.975,audio_features,spotify:track:46n2EGFnPC3tzWCN1Aqe26,https://api.spotify.com/v1/tracks/46n2EGFnPC3t...,https://api.spotify.com/v1/audio-analysis/46n2...,284760.0,4.0,
154,2f5N826udWfjT9iomeaBJt,If You're Gone,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2023-12-01T23:23:15Z,2000-05-23,https://open.spotify.com/track/2f5N826udWfjT9i...,3Ngh2zDBRPEriyxQDAMKd1,66,0.544,0.659,...,0.126,0.361,109.933,audio_features,spotify:track:2f5N826udWfjT9iomeaBJt,https://api.spotify.com/v1/tracks/2f5N826udWfj...,https://api.spotify.com/v1/audio-analysis/2f5N...,274933.0,4.0,
389,5ht9FVBi07F48ZAfIf7WdC,White Flag,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-08-07T08:19:40Z,2003-09-09,https://open.spotify.com/track/5ht9FVBi07F48ZA...,2mpeljBig2IXLXRAFO9AAs,53,0.512,0.525,...,0.081,0.294,169.951,audio_features,spotify:track:5ht9FVBi07F48ZAfIf7WdC,https://api.spotify.com/v1/tracks/5ht9FVBi07F4...,https://api.spotify.com/v1/audio-analysis/5ht9...,240040.0,4.0,
426,7B1QliUMZv7gSTUGAfMRRD,Live Like You Were Dying,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-08-07T08:20:08Z,2004-08-17,https://open.spotify.com/track/7B1QliUMZv7gSTU...,6roFdX1y5BYSbp60OTJWMd,70,0.416,0.546,...,0.0845,0.418,159.929,audio_features,spotify:track:7B1QliUMZv7gSTUGAfMRRD,https://api.spotify.com/v1/tracks/7B1QliUMZv7g...,https://api.spotify.com/v1/audio-analysis/7B1Q...,300333.0,4.0,
895,7JIuqL4ZqkpfGKQhYlrirs,The Only Exception,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-08-07T08:24:02Z,2009-09-28,https://open.spotify.com/track/7JIuqL4ZqkpfGKQ...,74XFHRwlV6OrjEM0A2NCMF,77,0.452,0.563,...,0.143,0.209,137.715,audio_features,spotify:track:7JIuqL4ZqkpfGKQhYlrirs,https://api.spotify.com/v1/tracks/7JIuqL4Zqkpf...,https://api.spotify.com/v1/audio-analysis/7JIu...,267653.0,3.0,
926,57uX2vR9j9DNiANDYfXw1i,Never Say Never,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-08-07T08:24:02Z,2009-02-02,https://open.spotify.com/track/57uX2vR9j9DNiAN...,0zOcE3mg9nS6l3yxt1Y0bK,69,0.23,0.492,...,0.176,0.262,160.139,audio_features,spotify:track:57uX2vR9j9DNiANDYfXw1i,https://api.spotify.com/v1/tracks/57uX2vR9j9DN...,https://api.spotify.com/v1/audio-analysis/57uX...,256613.0,4.0,
1071,6lanRgr6wXibZr8KgzXxBl,A Thousand Years,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-06-16T11:24:49Z,2011-10-18,https://open.spotify.com/track/6lanRgr6wXibZr8...,7H55rcKCfwqkyDFH9wpKM6,84,0.421,0.407,...,0.11,0.161,139.028,audio_features,spotify:track:6lanRgr6wXibZr8KgzXxBl,https://api.spotify.com/v1/tracks/6lanRgr6wXib...,https://api.spotify.com/v1/audio-analysis/6lan...,285120.0,3.0,
1342,1zVhMuH7agsRe6XkljIY4U,human,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-06-12T06:35:58Z,2013-11-18,https://open.spotify.com/track/1zVhMuH7agsRe6X...,7H55rcKCfwqkyDFH9wpKM6,61,0.439,0.489,...,0.114,0.253,143.808,audio_features,spotify:track:1zVhMuH7agsRe6XkljIY4U,https://api.spotify.com/v1/tracks/1zVhMuH7agsR...,https://api.spotify.com/v1/audio-analysis/1zVh...,250707.0,4.0,
1381,0qcr5FMsEO85NAQjrlDRKo,"Let It Go - From ""Frozen""/Soundtrack Version","[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-06-10T08:53:10Z,2013-01-01,https://open.spotify.com/track/0qcr5FMsEO85NAQ...,73Np75Wv2tju61Eo9Zw4IR,77,0.543,0.485,...,0.12,0.371,136.961,audio_features,spotify:track:0qcr5FMsEO85NAQjrlDRKo,https://api.spotify.com/v1/tracks/0qcr5FMsEO85...,https://api.spotify.com/v1/audio-analysis/0qcr...,223840.0,4.0,
1496,7GgWAITsYJaRM3r50rfh5w,When We Were Young,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",2020-06-20T06:19:41Z,2015-11-20,https://open.spotify.com/track/7GgWAITsYJaRM3r...,4dpARuHxo51G3z768sgnrY,76,0.376,0.595,...,0.095,0.265,144.361,audio_features,spotify:track:7GgWAITsYJaRM3r50rfh5w,https://api.spotify.com/v1/tracks/7GgWAITsYJaR...,https://api.spotify.com/v1/audio-analysis/7GgW...,290900.0,4.0,
